This commit was manufactured by cvs2svn to create branch 'vserver'.
[linux-2.6.git] / fs / intermezzo / vfs.c
1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2  * vim:expandtab:shiftwidth=8:tabstop=8:
3  *
4  *  Copyright (C) 2001, 2002 Cluster File Systems, Inc.
5  *  Copyright (C) 2000 Stelias Computing, Inc.
6  *  Copyright (C) 2000 Red Hat, Inc.
7  *
8  *   This file is part of InterMezzo, http://www.inter-mezzo.org.
9  *
10  *   InterMezzo is free software; you can redistribute it and/or
11  *   modify it under the terms of version 2 of the GNU General Public
12  *   License as published by the Free Software Foundation.
13  *
14  *   InterMezzo is distributed in the hope that it will be useful,
15  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
16  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17  *   GNU General Public License for more details.
18  *
19  *   You should have received a copy of the GNU General Public License
20  *   along with InterMezzo; if not, write to the Free Software
21  *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
22  *
23  * vfs.c
24  *
25  * This file implements kernel downcalls from lento.
26  *
27  * Author: Rob Simmonds <simmonds@stelias.com>
28  *         Andreas Dilger <adilger@stelias.com>
29  * Copyright (C) 2000 Stelias Computing Inc
30  * Copyright (C) 2000 Red Hat Inc.
31  *
32  * Extended attribute support
33  * Copyright (C) 2001 Shirish H. Phatak, Tacit Networks, Inc.
34  *
35  * This code is based on code from namei.c in the linux file system;
36  * see copyright notice below.
37  */
38
39 /** namei.c copyright **/
40
41 /*
42  *  linux/fs/namei.c
43  *
44  *  Copyright (C) 1991, 1992  Linus Torvalds
45  */
46 /*
47  * Some corrections by tytso.
48  */
49
50 /* [Feb 1997 T. Schoebel-Theuer] Complete rewrite of the pathname
51  * lookup logic.
52  */
53
54 /** end of namei.c copyright **/
55
56 #include <linux/mm.h>
57 #include <linux/proc_fs.h>
58 #include <linux/quotaops.h>
59
60 #include <asm/uaccess.h>
61 #include <asm/unaligned.h>
62 #include <asm/semaphore.h>
63 #include <asm/pgtable.h>
64
65 #include <linux/file.h>
66 #include <linux/fs.h>
67 #include <linux/namei.h>
68 #include <linux/genhd.h>
69
70 #include "intermezzo_fs.h"
71 #include "intermezzo_psdev.h"
72
73 #ifdef CONFIG_FS_EXT_ATTR
74 # include <linux/ext_attr.h>
75
76 # if 0 /* was a broken check for Posix ACLs */
77 #  include <linux/posix_acl.h>
78 # endif
79 #endif
80
81 extern struct inode_operations presto_sym_iops;
82
83 /* Write the last_rcvd values to the last_rcvd file.  We don't know what the
84  * UUID or last_ctime values are, so we have to read from the file first
85  * (sigh). 
86  * exported for branch_reinter in kml_reint.c*/
87 int presto_write_last_rcvd(struct rec_info *recinfo,
88                            struct presto_file_set *fset,
89                            struct lento_vfs_context *info)
90 {
91         int rc;
92         struct izo_rcvd_rec rcvd_rec;
93
94         ENTRY;
95
96         memset(&rcvd_rec, 0, sizeof(rcvd_rec));
97         memcpy(rcvd_rec.lr_uuid, info->uuid, sizeof(rcvd_rec.lr_uuid));
98         rcvd_rec.lr_remote_recno = HTON__u64(info->recno);
99         rcvd_rec.lr_remote_offset = HTON__u64(info->kml_offset);
100         rcvd_rec.lr_local_recno = HTON__u64(recinfo->recno);
101         rcvd_rec.lr_local_offset = HTON__u64(recinfo->offset + recinfo->size);
102
103         rc = izo_rcvd_write(fset, &rcvd_rec);
104         if (rc < 0) {
105                 /* izo_rcvd_write returns negative errors and non-negative
106                  * offsets */
107                 CERROR("InterMezzo: izo_rcvd_write failed: %d\n", rc);
108                 EXIT;
109                 return rc;
110         }
111         EXIT;
112         return 0;
113 }
114
115 /*
116  * It's inline, so penalty for filesystems that don't use sticky bit is
117  * minimal.
118  */
119 static inline int check_sticky(struct inode *dir, struct inode *inode)
120 {
121         if (!(dir->i_mode & S_ISVTX))
122                 return 0;
123         if (inode->i_uid == current->fsuid)
124                 return 0;
125         if (dir->i_uid == current->fsuid)
126                 return 0;
127         return !capable(CAP_FOWNER);
128 }
129
130 /* from linux/fs/namei.c */
131 static inline int may_delete(struct inode *dir,struct dentry *victim, int isdir)
132 {
133         int error;
134         if (!victim->d_inode || victim->d_parent->d_inode != dir)
135                 return -ENOENT;
136         error = permission(dir,MAY_WRITE | MAY_EXEC, NULL);
137         if (error)
138                 return error;
139         if (IS_APPEND(dir))
140                 return -EPERM;
141         if (check_sticky(dir, victim->d_inode)||IS_APPEND(victim->d_inode)||
142             IS_IMMUTABLE(victim->d_inode))
143                 return -EPERM;
144         if (isdir) {
145                 if (!S_ISDIR(victim->d_inode->i_mode))
146                         return -ENOTDIR;
147                 if (IS_ROOT(victim))
148                         return -EBUSY;
149         } else if (S_ISDIR(victim->d_inode->i_mode))
150                 return -EISDIR;
151         return 0;
152 }
153
154 /* from linux/fs/namei.c */
155 static inline int may_create(struct inode *dir, struct dentry *child) {
156         if (child->d_inode)
157                 return -EEXIST;
158         if (IS_DEADDIR(dir))
159                 return -ENOENT;
160         return permission(dir,MAY_WRITE | MAY_EXEC, NULL);
161 }
162
163 #ifdef PRESTO_DEBUG
164 /* The loop_discard_io() function is available via a kernel patch to the
165  * loop block device.  It "works" by accepting writes, but throwing them
166  * away, rather than trying to write them to disk.  The old method worked
167  * by setting the underlying device read-only, but that has the problem
168  * that dirty buffers are kept in memory, and ext3 didn't like that at all.
169  */
170 #ifdef CONFIG_LOOP_DISCARD
171 #define BLKDEV_FAIL(dev,fail) loop_discard_io(dev,fail)
172 #else
173 #define BLKDEV_FAIL(dev,fail) set_device_ro(dev, 1)
174 #endif
175
176 /* If a breakpoint has been set via /proc/sys/intermezzo/intermezzoX/errorval,
177  * that is the same as "value", the underlying device will "fail" now.
178  */
179 inline void presto_debug_fail_blkdev(struct presto_file_set *fset,
180                                      unsigned long value)
181 {
182         int minor = presto_f2m(fset);
183         int errorval = izo_channels[minor].uc_errorval;
184         struct block_device *bdev = fset->fset_dentry->d_inode->i_sb->s_bdev;
185         char b[BDEVNAME_SIZE];
186
187         if (errorval && errorval == (long)value && !bdev_read_only(bdev)) {
188                 CDEBUG(D_SUPER, "setting device %s read only\n",
189                                 bdevname(bdev, b));
190                 BLKDEV_FAIL(bdev, 1);
191                 izo_channels[minor].uc_errorval = -bdev->bd_dev;
192         }
193 }
194 #else
195 #define presto_debug_fail_blkdev(dev,value) do {} while (0)
196 #endif
197
198
199 static inline int presto_do_kml(struct lento_vfs_context *info,
200                                 struct dentry *dentry)
201 {
202         if ( ! (info->flags & LENTO_FL_KML) )
203                 return 0;
204         if ( presto_chk(dentry, PRESTO_DONT_JOURNAL) )
205                 return 0;
206         return 1;
207 }
208
209 static inline int presto_do_rcvd(struct lento_vfs_context *info,
210                                  struct dentry *dentry)
211 {
212         if ( ! (info->flags & LENTO_FL_EXPECT) ) 
213                 return 0;
214         if ( presto_chk(dentry, PRESTO_DONT_JOURNAL) )
215                 return 0;
216         return 1;
217 }
218
219
220 /* XXX fixme: this should not fail, all these dentries are in memory
221    when _we_ call this */
222 int presto_settime(struct presto_file_set *fset, 
223                    struct dentry *newobj,
224                    struct dentry *parent,
225                    struct dentry *target,
226                    struct lento_vfs_context *ctx, 
227                    int valid)
228 {
229         int error = 0;
230         struct dentry *dentry;
231         struct inode *inode;
232         struct inode_operations *iops;
233         struct iattr iattr;
234
235         ENTRY;
236         if (ctx->flags &  LENTO_FL_IGNORE_TIME ) { 
237                 EXIT;
238                 return 0;
239         }
240
241         iattr.ia_ctime = ctx->updated_time;
242         iattr.ia_mtime = ctx->updated_time;
243         iattr.ia_valid = valid;
244
245         while (1) {
246                 if (parent && ctx->flags & LENTO_FL_TOUCH_PARENT) {
247                         dentry = parent;
248                         parent = NULL;
249                 } else if (newobj && ctx->flags & LENTO_FL_TOUCH_NEWOBJ) {
250                         dentry = newobj;
251                         newobj = NULL;
252                 } else if (target) {
253                         dentry = target;
254                         target = NULL;
255                 } else
256                         break;
257
258                 inode = dentry->d_inode;
259
260                 error = -EROFS;
261                 if (IS_RDONLY(inode)) {
262                         EXIT;
263                         return -EROFS;
264                 }
265
266                 if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) {
267                         EXIT;
268                         return -EPERM;
269                 }
270
271                 error = -EPERM;
272                 iops = filter_c2cdiops(fset->fset_cache->cache_filter); 
273                 if (!iops) { 
274                         EXIT;
275                         return error;
276                 }
277
278                 if (iops->setattr != NULL)
279                         error = iops->setattr(dentry, &iattr);
280                 else {
281                         error = 0;
282                         inode_setattr(dentry->d_inode, &iattr);
283                 }
284         }
285         EXIT;
286         return error;
287 }
288
289 void izo_get_rollback_data(struct inode *inode, struct izo_rollback_data *rb)
290 {
291         rb->rb_mode = (__u32)inode->i_mode;
292         rb->rb_rdev = (__u32)old_encode_dev(inode->i_rdev);
293         rb->rb_uid  = (__u64)inode->i_uid;
294         rb->rb_gid  = (__u64)inode->i_gid;
295 }
296
297
298 int presto_do_close(struct presto_file_set *fset, struct file *file)
299 {
300         struct rec_info rec;
301         int rc = -ENOSPC; 
302         void *handle;
303         struct inode *inode = file->f_dentry->d_inode;
304         struct presto_file_data *fdata = 
305                 (struct presto_file_data *)file->private_data;
306
307         ENTRY;
308         presto_getversion(&fdata->fd_info.remote_version, inode);
309
310         rc = presto_reserve_space(fset->fset_cache, PRESTO_REQHIGH); 
311         if (rc) { 
312                 EXIT;
313                 return rc;
314         }
315
316         handle = presto_trans_start(fset, file->f_dentry->d_inode, 
317                                             KML_OPCODE_RELEASE);
318         if ( IS_ERR(handle) ) {
319                 CERROR("presto_release: no space for transaction\n");
320                 return rc;
321         }
322
323         if (fdata->fd_info.flags & LENTO_FL_KML) 
324                 rc = presto_journal_close(&rec, fset, fdata, file->f_dentry,
325                                           &fdata->fd_version, 
326                                           &fdata->fd_info.remote_version);
327         if (rc) { 
328                 CERROR("presto_close: cannot journal close\n");
329                 goto out;
330         }
331
332         if (fdata->fd_info.flags & LENTO_FL_EXPECT) 
333                 rc = presto_write_last_rcvd(&rec, fset, &fdata->fd_info);
334
335         if (rc) { 
336                 CERROR("presto_close: cannot journal last_rcvd\n");
337                 goto out;
338         }
339         presto_trans_commit(fset, handle); 
340         
341         /* cancel the LML record */ 
342         handle = presto_trans_start(fset, inode, KML_OPCODE_WRITE);
343         if ( IS_ERR(handle) ) {
344                 CERROR("presto_release: no space for clear\n");
345                 return -ENOSPC;
346         }
347
348         rc = presto_clear_lml_close(fset, fdata->fd_lml_offset); 
349         if (rc < 0 ) { 
350                 CERROR("presto_close: cannot journal close\n");
351                 goto out;
352         }
353         presto_truncate_lml(fset);
354
355  out:
356         presto_release_space(fset->fset_cache, PRESTO_REQHIGH); 
357         presto_trans_commit(fset, handle); 
358         EXIT;
359         return rc;
360 }
361
362 int presto_do_setattr(struct presto_file_set *fset, struct dentry *dentry,
363                       struct iattr *iattr, struct lento_vfs_context *info)
364 {
365         struct rec_info rec;
366         struct inode *inode = dentry->d_inode;
367         struct inode_operations *iops;
368         int error;
369         struct presto_version old_ver, new_ver;
370         struct izo_rollback_data rb;
371         void *handle;
372         loff_t old_size=inode->i_size;
373
374         ENTRY;
375         error = -EROFS;
376         if (IS_RDONLY(inode)) {
377                 EXIT;
378                 return -EROFS;
379         }
380
381         if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) {
382                 EXIT;
383                 return -EPERM;
384         }
385
386         presto_getversion(&old_ver, dentry->d_inode);
387         izo_get_rollback_data(dentry->d_inode, &rb);
388         error = -EPERM;
389         iops = filter_c2cdiops(fset->fset_cache->cache_filter); 
390
391         error = presto_reserve_space(fset->fset_cache, 2*PRESTO_REQHIGH); 
392         if (error) {
393                 EXIT;
394                 return error;
395         }
396
397         if  (iattr->ia_valid & ATTR_SIZE) {
398                 if (izo_mark_dentry(dentry, ~PRESTO_DATA, 0, NULL) != 0)
399                         CERROR("izo_mark_dentry(inode %ld, ~PRESTO_DATA) "
400                                "failed\n", dentry->d_inode->i_ino);
401                 handle = presto_trans_start(fset, dentry->d_inode,
402                                             KML_OPCODE_TRUNC);
403         } else {
404                 handle = presto_trans_start(fset, dentry->d_inode,
405                                             KML_OPCODE_SETATTR);
406         }
407
408         if ( IS_ERR(handle) ) {
409                 CERROR("presto_do_setattr: no space for transaction\n");
410                 presto_release_space(fset->fset_cache, 2*PRESTO_REQHIGH); 
411                 return -ENOSPC;
412         }
413
414         if (dentry->d_inode && iops && iops->setattr) {
415                 error = iops->setattr(dentry, iattr);
416         } else {
417                 error = inode_change_ok(dentry->d_inode, iattr);
418                 if (!error) 
419                         inode_setattr(inode, iattr);
420         }
421
422         if (!error && (iattr->ia_valid & ATTR_SIZE))
423                 vmtruncate(inode, iattr->ia_size);
424
425         if (error) {
426                 EXIT;
427                 goto exit;
428         }
429
430         presto_debug_fail_blkdev(fset, KML_OPCODE_SETATTR | 0x10);
431
432         if ( presto_do_kml(info, dentry) ) {
433                 if ((iattr->ia_valid & ATTR_SIZE) && (old_size != inode->i_size)) {
434                         /* Journal a close whenever we see a potential truncate
435                         * At the receiving end, lento should explicitly remove
436                         * ATTR_SIZE from the list of valid attributes */
437                         presto_getversion(&new_ver, inode);
438                         error = presto_journal_close(&rec, fset, NULL, dentry,
439                                                      &old_ver, &new_ver);
440                 }
441
442                 if (!error)
443                         error = presto_journal_setattr(&rec, fset, dentry,
444                                                        &old_ver, &rb, iattr);
445         }
446
447         presto_debug_fail_blkdev(fset, KML_OPCODE_SETATTR | 0x20);
448         if ( presto_do_rcvd(info, dentry) )
449                 error = presto_write_last_rcvd(&rec, fset, info);
450
451         presto_debug_fail_blkdev(fset, KML_OPCODE_SETATTR | 0x30);
452
453         EXIT;
454 exit:
455         presto_release_space(fset->fset_cache, 2*PRESTO_REQHIGH); 
456         presto_trans_commit(fset, handle);
457         return error;
458 }
459
460 int lento_setattr(const char *name, struct iattr *iattr,
461                   struct lento_vfs_context *info)
462 {
463         struct nameidata nd;
464         struct dentry *dentry;
465         struct presto_file_set *fset;
466         int error;
467 #if 0 /* was a broken check for Posix ACLs */
468         int (*set_posix_acl)(struct inode *, int type, posix_acl_t *)=NULL;
469 #endif
470
471         ENTRY;
472         CDEBUG(D_PIOCTL,"name %s, valid %#x, mode %#o, uid %d, gid %d, size %Ld\n",
473                name, iattr->ia_valid, iattr->ia_mode, iattr->ia_uid,
474                iattr->ia_gid, iattr->ia_size);
475         CDEBUG(D_PIOCTL, "atime %#lx, mtime %#lx, ctime %#lx, attr_flags %#x\n",
476                iattr->ia_atime.tv_sec, iattr->ia_mtime.tv_sec, iattr->ia_ctime.tv_sec,
477                iattr->ia_attr_flags);
478         CDEBUG(D_PIOCTL, "offset %d, recno %d, flags %#x\n",
479                info->slot_offset, info->recno, info->flags);
480
481         lock_kernel();
482         error = presto_walk(name, &nd);
483         if (error) {
484                 EXIT;
485                 goto exit;
486         }
487         dentry = nd.dentry;
488         
489         fset = presto_fset(dentry);
490         error = -EINVAL;
491         if ( !fset ) {
492                 CERROR("No fileset!\n");
493                 EXIT;
494                 goto exit_lock;
495         }
496
497         /* NOTE: this prevents us from changing the filetype on setattr,
498          *       as we normally only want to change permission bits.
499          *       If this is not correct, then we need to fix the perl code
500          *       to always send the file type OR'ed with the permission.
501          */
502         if (iattr->ia_valid & ATTR_MODE) {
503                 int set_mode = iattr->ia_mode;
504                 iattr->ia_mode = (iattr->ia_mode & S_IALLUGO) |
505                                  (dentry->d_inode->i_mode & ~S_IALLUGO);
506                 CDEBUG(D_PIOCTL, "chmod: orig %#o, set %#o, result %#o\n",
507                        dentry->d_inode->i_mode, set_mode, iattr->ia_mode);
508 #if 0 /* was a broken check for Posix ACLs */
509                 /* ACl code interacts badly with setattr 
510                  * since it tries to modify the ACL using 
511                  * set_ext_attr which recurses back into presto.  
512                  * This only happens if ATTR_MODE is set.
513                  * Here we are doing a "forced" mode set 
514                  * (initiated by lento), so we disable the 
515                  * set_posix_acl operation which 
516                  * prevents such recursion.  -SHP
517                  *
518                  * This will probably still be required when native
519                  * acl journalling is in place.
520                  */
521                 set_posix_acl=dentry->d_inode->i_op->set_posix_acl;
522                 dentry->d_inode->i_op->set_posix_acl=NULL;
523 #endif
524         }
525
526         error = presto_do_setattr(fset, dentry, iattr, info);
527
528         if (info->flags & LENTO_FL_SET_DDFILEID) {
529                 struct presto_dentry_data *dd = presto_d2d(dentry);
530                 if (dd) {
531                         dd->remote_ino = info->remote_ino;
532                         dd->remote_generation = info->remote_generation;
533                 }
534         }
535
536 #if 0 /* was a broken check for Posix ACLs */
537         /* restore the inode_operations if we changed them*/
538         if (iattr->ia_valid & ATTR_MODE) 
539                 dentry->d_inode->i_op->set_posix_acl=set_posix_acl;
540 #endif
541
542
543         EXIT;
544 exit_lock:
545         path_release(&nd);
546 exit:
547         unlock_kernel();
548         return error;
549 }
550
551 int presto_do_create(struct presto_file_set *fset, struct dentry *dir,
552                      struct dentry *dentry, int mode,
553                      struct lento_vfs_context *info)
554 {
555         struct rec_info rec;
556         int error;
557         struct presto_version tgt_dir_ver, new_file_ver;
558         struct inode_operations *iops;
559         void *handle;
560
561         ENTRY;
562         mode &= S_IALLUGO;
563         mode |= S_IFREG;
564
565         //        down(&dir->d_inode->i_zombie);
566         error = presto_reserve_space(fset->fset_cache, PRESTO_REQHIGH); 
567         if (error) {
568                 EXIT;
569                 //                up(&dir->d_inode->i_zombie);
570                 return error;
571         }
572
573         error = may_create(dir->d_inode, dentry);
574         if (error) {
575                 EXIT;
576                 goto exit_pre_lock;
577         }
578
579         error = -EPERM;
580         iops = filter_c2cdiops(fset->fset_cache->cache_filter);
581         if (!iops->create) {
582                 EXIT;
583                 goto exit_pre_lock;
584         }
585
586         presto_getversion(&tgt_dir_ver, dir->d_inode);
587         handle = presto_trans_start(fset, dir->d_inode, KML_OPCODE_CREATE);
588         if ( IS_ERR(handle) ) {
589                 EXIT;
590                 presto_release_space(fset->fset_cache, PRESTO_REQHIGH); 
591                 CERROR("presto_do_create: no space for transaction\n");
592                 error=-ENOSPC;
593                 goto exit_pre_lock;
594         }
595         DQUOT_INIT(dir->d_inode);
596         lock_kernel();
597         error = iops->create(dir->d_inode, dentry, mode, NULL);
598         if (error) {
599                 EXIT;
600                 goto exit_lock;
601         }
602
603         if (dentry->d_inode) {
604                 struct presto_cache *cache = fset->fset_cache;
605                 /* was this already done? */
606                 presto_set_ops(dentry->d_inode, cache->cache_filter);
607
608                 filter_setup_dentry_ops(cache->cache_filter, 
609                                         dentry->d_op, 
610                                         &presto_dentry_ops);
611                 dentry->d_op = filter_c2udops(cache->cache_filter);
612
613                 /* if Lento creates this file, we won't have data */
614                 if ( ISLENTO(presto_c2m(cache)) ) {
615                         presto_set(dentry, PRESTO_ATTR);
616                 } else {
617                         presto_set(dentry, PRESTO_ATTR | PRESTO_DATA);
618                 }
619         }
620
621         info->flags |= LENTO_FL_TOUCH_PARENT;
622         error = presto_settime(fset, NULL, dir, dentry,
623                                info, ATTR_CTIME | ATTR_MTIME);
624         if (error) { 
625                 EXIT;
626                 goto exit_lock;
627         }
628
629         presto_debug_fail_blkdev(fset, KML_OPCODE_CREATE | 0x10);
630
631         if ( presto_do_kml(info, dentry) ) { 
632                 presto_getversion(&new_file_ver, dentry->d_inode);
633                 error = presto_journal_create(&rec, fset, dentry, &tgt_dir_ver,
634                                               &new_file_ver, 
635                                               dentry->d_inode->i_mode);
636         }
637
638         presto_debug_fail_blkdev(fset, KML_OPCODE_CREATE | 0x20);
639
640         if ( presto_do_rcvd(info, dentry) )
641                 error = presto_write_last_rcvd(&rec, fset, info);
642
643         presto_debug_fail_blkdev(fset, KML_OPCODE_CREATE | 0x30);
644
645         /* add inode dentry */
646         if (fset->fset_cache->cache_filter->o_trops->tr_add_ilookup ) { 
647                 struct dentry *d;
648                 d = fset->fset_cache->cache_filter->o_trops->tr_add_ilookup
649                         (dir->d_inode->i_sb->s_root, dentry);
650         }
651
652         EXIT;
653
654  exit_lock:
655         unlock_kernel();
656         presto_trans_commit(fset, handle);
657  exit_pre_lock:
658         presto_release_space(fset->fset_cache, PRESTO_REQHIGH); 
659         //        up(&dir->d_inode->i_zombie);
660         return error;
661 }
662
663 int lento_create(const char *name, int mode, struct lento_vfs_context *info)
664 {
665         int error;
666         struct nameidata nd;
667         char * pathname;
668         struct dentry *dentry;
669         struct presto_file_set *fset;
670
671         ENTRY;
672         pathname = getname(name);
673         error = PTR_ERR(pathname);
674         if (IS_ERR(pathname)) {
675                 EXIT;
676                 goto exit;
677         }
678
679         /* this looks up the parent */
680         error = path_lookup(pathname,  LOOKUP_PARENT, &nd);
681         if (error) {
682                 EXIT;
683                 goto exit;
684         }
685         dentry = lookup_create(&nd, 0);
686         error = PTR_ERR(dentry);
687         if (IS_ERR(dentry)) {
688                 EXIT;
689                 goto exit_lock;
690         }
691
692         fset = presto_fset(dentry);
693         error = -EINVAL;
694         if ( !fset ) {
695                 CERROR("No fileset!\n");
696                 EXIT;
697                 goto exit_lock;
698         }
699         error = presto_do_create(fset, dentry->d_parent, dentry, (mode&S_IALLUGO)|S_IFREG,
700                                  info);
701
702         EXIT;
703
704  exit_lock:
705         path_release (&nd);
706         dput(dentry); 
707         up(&dentry->d_parent->d_inode->i_sem);
708         putname(pathname);
709 exit:
710         return error;
711 }
712
713 int presto_do_link(struct presto_file_set *fset, struct dentry *old_dentry,
714                    struct dentry *dir, struct dentry *new_dentry,
715                    struct lento_vfs_context *info)
716 {
717         struct rec_info rec;
718         struct inode *inode;
719         int error;
720         struct inode_operations *iops;
721         struct presto_version tgt_dir_ver;
722         struct presto_version new_link_ver;
723         void *handle;
724
725         //        down(&dir->d_inode->i_zombie);
726         error = presto_reserve_space(fset->fset_cache, PRESTO_REQHIGH); 
727         if (error) {
728                 EXIT;
729                 //                up(&dir->d_inode->i_zombie);
730                 return error;
731         }
732         error = -ENOENT;
733         inode = old_dentry->d_inode;
734         if (!inode)
735                 goto exit_lock;
736
737         error = may_create(dir->d_inode, new_dentry);
738         if (error)
739                 goto exit_lock;
740
741         error = -EXDEV;
742         if (dir->d_inode->i_sb != inode->i_sb)
743                 goto exit_lock;
744
745         /*
746          * A link to an append-only or immutable file cannot be created.
747          */
748         error = -EPERM;
749         if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) {
750                 EXIT;
751                 goto exit_lock;
752         }
753
754         iops = filter_c2cdiops(fset->fset_cache->cache_filter);
755         if (!iops->link) {
756                 EXIT;
757                 goto exit_lock;
758         }
759
760
761         presto_getversion(&tgt_dir_ver, dir->d_inode);
762         handle = presto_trans_start(fset, dir->d_inode, KML_OPCODE_LINK);
763         if ( IS_ERR(handle) ) {
764                 presto_release_space(fset->fset_cache, PRESTO_REQHIGH); 
765                 CERROR("presto_do_link: no space for transaction\n");
766                 return -ENOSPC;
767         }
768
769         DQUOT_INIT(dir->d_inode);
770         lock_kernel();
771         error = iops->link(old_dentry, dir->d_inode, new_dentry);
772         unlock_kernel();
773         if (error) {
774                 EXIT;
775                 goto exit_lock;
776         }
777
778         /* link dd data to that of existing dentry */
779         old_dentry->d_op->d_release(new_dentry); 
780         if (!presto_d2d(old_dentry)) 
781                 BUG();
782         presto_d2d(old_dentry)->dd_count++;
783
784         new_dentry->d_fsdata = presto_d2d(old_dentry);
785
786         info->flags |= LENTO_FL_TOUCH_PARENT;
787         error = presto_settime(fset, NULL, dir, new_dentry,
788                                info, ATTR_CTIME);
789         if (error) { 
790                 EXIT;
791                 goto exit_lock;
792         }
793
794         presto_debug_fail_blkdev(fset, KML_OPCODE_LINK | 0x10);
795         presto_getversion(&new_link_ver, new_dentry->d_inode);
796         if ( presto_do_kml(info, old_dentry) )
797                 error = presto_journal_link(&rec, fset, old_dentry, new_dentry,
798                                             &tgt_dir_ver, &new_link_ver);
799
800         presto_debug_fail_blkdev(fset, KML_OPCODE_LINK | 0x20);
801         if ( presto_do_rcvd(info, old_dentry) )
802                 error = presto_write_last_rcvd(&rec, fset, info);
803
804         presto_debug_fail_blkdev(fset, KML_OPCODE_LINK | 0x30);
805         EXIT;
806         presto_trans_commit(fset, handle);
807 exit_lock:
808         presto_release_space(fset->fset_cache, PRESTO_REQHIGH); 
809         //        up(&dir->d_inode->i_zombie);
810         return error;
811 }
812
813
814 int lento_link(const char * oldname, const char * newname, 
815                          struct lento_vfs_context *info)
816 {
817         int error;
818         char * to;
819         struct presto_file_set *fset;
820
821         to = getname(newname);
822         error = PTR_ERR(to);
823         if (!IS_ERR(to)) {
824                 struct dentry *new_dentry;
825                 struct nameidata nd, old_nd;
826
827                 error = __user_walk(oldname, 0, &old_nd);
828                 if (error)
829                         goto exit;
830                 error = path_lookup(to, LOOKUP_PARENT, &nd);
831                 if (error)
832                         goto out;
833                 error = -EXDEV;
834                 if (old_nd.mnt != nd.mnt)
835                         goto out;
836                 new_dentry = lookup_create(&nd, 0);
837                 error = PTR_ERR(new_dentry);
838
839                 if (!IS_ERR(new_dentry)) {
840                         fset = presto_fset(new_dentry);
841                         error = -EINVAL;
842                         if ( !fset ) {
843                                 CERROR("No fileset!\n");
844                                 EXIT;
845                                 goto out2;
846                         }
847                         error = presto_do_link(fset, old_nd.dentry, 
848                                                nd.dentry,
849                                                new_dentry, info);
850                         dput(new_dentry);
851                 }
852         out2:
853                 up(&nd.dentry->d_inode->i_sem);
854                 path_release(&nd);
855         out:
856                 path_release(&old_nd);
857         exit:
858                 putname(to);
859         }
860         return error;
861 }
862
863 int presto_do_unlink(struct presto_file_set *fset, struct dentry *dir,
864                      struct dentry *dentry, struct lento_vfs_context *info)
865 {
866         struct rec_info rec;
867         struct inode_operations *iops;
868         struct presto_version tgt_dir_ver, old_file_ver;
869         struct izo_rollback_data rb;
870         void *handle;
871         int do_kml = 0, do_rcvd = 0, linkno = 0, error, old_targetlen = 0;
872         char *old_target = NULL;
873
874         ENTRY;
875         //        down(&dir->d_inode->i_zombie);
876         error = may_delete(dir->d_inode, dentry, 0);
877         if (error) {
878                 EXIT;
879                 //                up(&dir->d_inode->i_zombie);
880                 return error;
881         }
882
883         error = -EPERM;
884         iops = filter_c2cdiops(fset->fset_cache->cache_filter);
885         if (!iops->unlink) {
886                 EXIT;
887                 //                up(&dir->d_inode->i_zombie);
888                 return error;
889         }
890
891         error = presto_reserve_space(fset->fset_cache, PRESTO_REQLOW); 
892         if (error) {
893                 EXIT;
894                 //                up(&dir->d_inode->i_zombie);
895                 return error;
896         }
897
898
899         if (presto_d2d(dentry)) { 
900                 struct presto_dentry_data *dd = presto_d2d(dentry); 
901                 struct dentry *de = dd->dd_inodentry;
902                 if (de && dentry->d_inode->i_nlink == 1) { 
903                         dd->dd_count--;
904                         dd->dd_inodentry = NULL; 
905                         de->d_fsdata = NULL; 
906                         atomic_dec(&de->d_inode->i_count); 
907                         de->d_inode = NULL;
908                         dput(de); 
909                 }
910         }
911
912         presto_getversion(&tgt_dir_ver, dir->d_inode);
913         presto_getversion(&old_file_ver, dentry->d_inode);
914         izo_get_rollback_data(dentry->d_inode, &rb);
915         handle = presto_trans_start(fset, dir->d_inode, KML_OPCODE_UNLINK);
916         if ( IS_ERR(handle) ) {
917                 presto_release_space(fset->fset_cache, PRESTO_REQLOW); 
918                 CERROR("ERROR: presto_do_unlink: no space for transaction. Tell Peter.\n");
919                 //                up(&dir->d_inode->i_zombie);
920                 return -ENOSPC;
921         }
922         DQUOT_INIT(dir->d_inode);
923         if (d_mountpoint(dentry))
924                 error = -EBUSY;
925         else {
926                 lock_kernel();
927                 linkno = dentry->d_inode->i_nlink;
928                 if (linkno > 1) {
929                         dget(dentry);
930                 }
931
932                 if (S_ISLNK(dentry->d_inode->i_mode)) {
933                         mm_segment_t old_fs;
934                         struct inode_operations *riops;
935                         riops = filter_c2csiops(fset->fset_cache->cache_filter);
936
937                         PRESTO_ALLOC(old_target, PATH_MAX);
938                         if (old_target == NULL) {
939                                 error = -ENOMEM;
940                                 EXIT;
941                                 goto exit;
942                         }
943
944                         old_fs = get_fs();
945                         set_fs(get_ds());
946
947                         if (riops->readlink == NULL)
948                                 CERROR("InterMezzo %s: no readlink iops.\n",
949                                        __FUNCTION__);
950                         else
951                                 old_targetlen =
952                                         riops->readlink(dentry, old_target,
953                                                         PATH_MAX);
954                         if (old_targetlen < 0) {
955                                 CERROR("InterMezzo: readlink failed: %ld\n",
956                                        PTR_ERR(old_target));
957                                 PRESTO_FREE(old_target, PATH_MAX);
958                                 old_target = NULL;
959                                 old_targetlen = 0;
960                         }
961                         set_fs(old_fs);
962                 }
963
964                 do_kml = presto_do_kml(info, dir);
965                 do_rcvd = presto_do_rcvd(info, dir);
966                 error = iops->unlink(dir->d_inode, dentry);
967                 unlock_kernel();
968         }
969
970         if (linkno > 1) { 
971                 /* FIXME: Combine this with the next call? */
972                 error = presto_settime(fset, NULL, NULL, dentry,
973                                        info, ATTR_CTIME);
974                 dput(dentry); 
975                 if (error) { 
976                         EXIT;
977                         goto exit;
978                 }
979         }
980
981         error = presto_settime(fset, NULL, NULL, dir,
982                                info, ATTR_CTIME | ATTR_MTIME);
983         if (error) { 
984                 EXIT;
985                 goto exit;
986         }
987
988         //        up(&dir->d_inode->i_zombie);
989
990         presto_debug_fail_blkdev(fset, KML_OPCODE_UNLINK | 0x10);
991         if ( do_kml )
992                 error = presto_journal_unlink(&rec, fset, dir, &tgt_dir_ver,
993                                               &old_file_ver, &rb, dentry,
994                                               old_target, old_targetlen);
995         presto_debug_fail_blkdev(fset, KML_OPCODE_UNLINK | 0x20);
996         if ( do_rcvd ) { 
997                 error = presto_write_last_rcvd(&rec, fset, info);
998         }
999         presto_debug_fail_blkdev(fset, KML_OPCODE_UNLINK | 0x30);
1000         EXIT;
1001 exit:
1002         presto_release_space(fset->fset_cache, PRESTO_REQLOW); 
1003         presto_trans_commit(fset, handle);
1004         if (old_target != NULL)
1005                 PRESTO_FREE(old_target, PATH_MAX);
1006         return error;
1007 }
1008
1009
1010 int lento_unlink(const char *pathname, struct lento_vfs_context *info)
1011 {
1012         int error = 0;
1013         char * name;
1014         struct dentry *dentry;
1015         struct nameidata nd;
1016         struct presto_file_set *fset;
1017
1018         ENTRY;
1019
1020         name = getname(pathname);
1021         if(IS_ERR(name))
1022                 return PTR_ERR(name);
1023
1024         error = path_lookup(name, LOOKUP_PARENT, &nd);
1025         if (error)
1026                 goto exit;
1027         error = -EISDIR;
1028         if (nd.last_type != LAST_NORM)
1029                 goto exit1;
1030         down(&nd.dentry->d_inode->i_sem);
1031         dentry = lookup_hash(&nd.last, nd.dentry);
1032         error = PTR_ERR(dentry);
1033         if (!IS_ERR(dentry)) {
1034                 fset = presto_fset(dentry);
1035                 error = -EINVAL;
1036                 if ( !fset ) {
1037                         CERROR("No fileset!\n");
1038                         EXIT;
1039                         goto exit2;
1040                 }
1041                 /* Why not before? Because we want correct error value */
1042                 if (nd.last.name[nd.last.len])
1043                         goto slashes;
1044                 error = presto_do_unlink(fset, nd.dentry, dentry, info);
1045                 if (!error)
1046                         d_delete(dentry);
1047         exit2:
1048                 EXIT;
1049                 dput(dentry);
1050         }
1051         up(&nd.dentry->d_inode->i_sem);
1052 exit1:
1053         path_release(&nd);
1054 exit:
1055         putname(name);
1056
1057         return error;
1058
1059 slashes:
1060         error = !dentry->d_inode ? -ENOENT :
1061                 S_ISDIR(dentry->d_inode->i_mode) ? -EISDIR : -ENOTDIR;
1062         goto exit2;
1063 }
1064
1065 int presto_do_symlink(struct presto_file_set *fset, struct dentry *dir,
1066                       struct dentry *dentry, const char *oldname,
1067                       struct lento_vfs_context *info)
1068 {
1069         struct rec_info rec;
1070         int error;
1071         struct presto_version tgt_dir_ver, new_link_ver;
1072         struct inode_operations *iops;
1073         void *handle;
1074
1075         ENTRY;
1076         //        down(&dir->d_inode->i_zombie);
1077         /* record + max path len + space to free */ 
1078         error = presto_reserve_space(fset->fset_cache, PRESTO_REQHIGH + 4096); 
1079         if (error) {
1080                 EXIT;
1081                 //                up(&dir->d_inode->i_zombie);
1082                 return error;
1083         }
1084
1085         error = may_create(dir->d_inode, dentry);
1086         if (error) {
1087                 EXIT;
1088                 goto exit_lock;
1089         }
1090
1091         error = -EPERM;
1092         iops = filter_c2cdiops(fset->fset_cache->cache_filter);
1093         if (!iops->symlink) {
1094                 EXIT;
1095                 goto exit_lock;
1096         }
1097
1098         presto_getversion(&tgt_dir_ver, dir->d_inode);
1099         handle = presto_trans_start(fset, dir->d_inode, KML_OPCODE_SYMLINK);
1100         if ( IS_ERR(handle) ) {
1101                 presto_release_space(fset->fset_cache, PRESTO_REQHIGH + 4096); 
1102                 CERROR("ERROR: presto_do_symlink: no space for transaction. Tell Peter.\n"); 
1103                 EXIT;
1104                 //                up(&dir->d_inode->i_zombie);
1105                 return -ENOSPC;
1106         }
1107         DQUOT_INIT(dir->d_inode);
1108         lock_kernel();
1109         error = iops->symlink(dir->d_inode, dentry, oldname);
1110         if (error) {
1111                 EXIT;
1112                 goto exit;
1113         }
1114
1115         if (dentry->d_inode) {
1116                 struct presto_cache *cache = fset->fset_cache;
1117                 
1118                 presto_set_ops(dentry->d_inode, cache->cache_filter);
1119
1120                 filter_setup_dentry_ops(cache->cache_filter, dentry->d_op, 
1121                                         &presto_dentry_ops);
1122                 dentry->d_op = filter_c2udops(cache->cache_filter);
1123                 /* XXX ? Cache state ? if Lento creates a symlink */
1124                 if ( ISLENTO(presto_c2m(cache)) ) {
1125                         presto_set(dentry, PRESTO_ATTR);
1126                 } else {
1127                         presto_set(dentry, PRESTO_ATTR | PRESTO_DATA);
1128                 }
1129         }
1130
1131         info->flags |= LENTO_FL_TOUCH_PARENT;
1132         error = presto_settime(fset, NULL, dir, dentry,
1133                                info, ATTR_CTIME | ATTR_MTIME);
1134         if (error) { 
1135                 EXIT;
1136                 goto exit;
1137         }
1138
1139         presto_debug_fail_blkdev(fset, KML_OPCODE_SYMLINK | 0x10);
1140         presto_getversion(&new_link_ver, dentry->d_inode);
1141         if ( presto_do_kml(info, dentry) )
1142                 error = presto_journal_symlink(&rec, fset, dentry, oldname,
1143                                                &tgt_dir_ver, &new_link_ver);
1144
1145         presto_debug_fail_blkdev(fset, KML_OPCODE_SYMLINK | 0x20);
1146         if ( presto_do_rcvd(info, dentry) )
1147                 error = presto_write_last_rcvd(&rec, fset, info);
1148
1149         presto_debug_fail_blkdev(fset, KML_OPCODE_SYMLINK | 0x30);
1150         EXIT;
1151 exit:
1152         unlock_kernel();
1153         presto_trans_commit(fset, handle);
1154  exit_lock:
1155         presto_release_space(fset->fset_cache, PRESTO_REQHIGH + 4096); 
1156         //        up(&dir->d_inode->i_zombie);
1157         return error;
1158 }
1159
1160 int lento_symlink(const char *oldname, const char *newname,
1161                   struct lento_vfs_context *info)
1162 {
1163         int error;
1164         char *from;
1165         char *to;
1166         struct dentry *dentry;
1167         struct presto_file_set *fset;
1168         struct nameidata nd;
1169
1170         ENTRY;
1171         lock_kernel();
1172         from = getname(oldname);
1173         error = PTR_ERR(from);
1174         if (IS_ERR(from)) {
1175                 EXIT;
1176                 goto exit;
1177         }
1178
1179         to = getname(newname);
1180         error = PTR_ERR(to);
1181         if (IS_ERR(to)) {
1182                 EXIT;
1183                 goto exit_from;
1184         }
1185
1186         error = path_lookup(to, LOOKUP_PARENT, &nd);
1187         if (error) {
1188                 EXIT;
1189                 goto exit_to;
1190         }
1191
1192         dentry = lookup_create(&nd, 0);
1193         error = PTR_ERR(dentry);
1194         if (IS_ERR(dentry)) {
1195                 path_release(&nd);
1196                 EXIT;
1197                 goto exit_to;
1198         }
1199
1200         fset = presto_fset(dentry);
1201         error = -EINVAL;
1202         if ( !fset ) {
1203                 CERROR("No fileset!\n");
1204                 path_release(&nd);
1205                 EXIT;
1206                 goto exit_lock;
1207         }
1208         error = presto_do_symlink(fset, nd.dentry,
1209                                   dentry, from, info);
1210         path_release(&nd);
1211         EXIT;
1212  exit_lock:
1213         up(&nd.dentry->d_inode->i_sem);
1214         dput(dentry);
1215  exit_to:
1216         putname(to);
1217  exit_from:
1218         putname(from);
1219  exit:
1220         unlock_kernel();
1221         return error;
1222 }
1223
1224 int presto_do_mkdir(struct presto_file_set *fset, struct dentry *dir,
1225                     struct dentry *dentry, int mode,
1226                     struct lento_vfs_context *info)
1227 {
1228         struct rec_info rec;
1229         int error;
1230         struct presto_version tgt_dir_ver, new_dir_ver;
1231         void *handle;
1232
1233         ENTRY;
1234         //        down(&dir->d_inode->i_zombie);
1235
1236         /* one journal record + directory block + room for removals*/ 
1237         error = presto_reserve_space(fset->fset_cache, PRESTO_REQHIGH + 4096); 
1238         if (error) { 
1239                 EXIT;
1240                 //                up(&dir->d_inode->i_zombie);
1241                 return error;
1242         }
1243
1244         error = may_create(dir->d_inode, dentry);
1245         if (error) {
1246                 EXIT;
1247                 goto exit_lock;
1248         }
1249
1250         error = -EPERM;
1251         if (!filter_c2cdiops(fset->fset_cache->cache_filter)->mkdir) {
1252                 EXIT;
1253                 goto exit_lock;
1254         }
1255
1256         error = -ENOSPC;
1257         presto_getversion(&tgt_dir_ver, dir->d_inode);
1258         handle = presto_trans_start(fset, dir->d_inode, KML_OPCODE_MKDIR);
1259         if ( IS_ERR(handle) ) {
1260                 presto_release_space(fset->fset_cache, PRESTO_REQHIGH + 4096); 
1261                 CERROR("presto_do_mkdir: no space for transaction\n");
1262                 goto exit_lock;
1263         }
1264
1265         DQUOT_INIT(dir->d_inode);
1266         mode &= (S_IRWXUGO|S_ISVTX);
1267         lock_kernel();
1268         error = filter_c2cdiops(fset->fset_cache->cache_filter)->mkdir(dir->d_inode, dentry, mode);
1269         if (error) {
1270                 EXIT;
1271                 goto exit;
1272         }
1273
1274         if ( dentry->d_inode && !error) {
1275                 struct presto_cache *cache = fset->fset_cache;
1276
1277                 presto_set_ops(dentry->d_inode, cache->cache_filter);
1278
1279                 filter_setup_dentry_ops(cache->cache_filter, 
1280                                         dentry->d_op, 
1281                                         &presto_dentry_ops);
1282                 dentry->d_op = filter_c2udops(cache->cache_filter);
1283                 /* if Lento does this, we won't have data */
1284                 if ( ISLENTO(presto_c2m(cache)) ) {
1285                         presto_set(dentry, PRESTO_ATTR);
1286                 } else {
1287                         presto_set(dentry, PRESTO_ATTR | PRESTO_DATA);
1288                 }
1289         }
1290
1291         info->flags |= LENTO_FL_TOUCH_PARENT;
1292         error = presto_settime(fset, NULL, dir, dentry,
1293                              info, ATTR_CTIME | ATTR_MTIME);
1294         if (error) { 
1295                 EXIT;
1296                 goto exit;
1297         }
1298
1299         presto_debug_fail_blkdev(fset, KML_OPCODE_MKDIR | 0x10);
1300         presto_getversion(&new_dir_ver, dentry->d_inode);
1301         if ( presto_do_kml(info, dir) )
1302                 error = presto_journal_mkdir(&rec, fset, dentry, &tgt_dir_ver,
1303                                              &new_dir_ver, 
1304                                              dentry->d_inode->i_mode);
1305
1306         presto_debug_fail_blkdev(fset, KML_OPCODE_MKDIR | 0x20);
1307         if ( presto_do_rcvd(info, dentry) )
1308                 error = presto_write_last_rcvd(&rec, fset, info);
1309
1310         presto_debug_fail_blkdev(fset, KML_OPCODE_MKDIR | 0x30);
1311         EXIT;
1312 exit:
1313         unlock_kernel();
1314         presto_trans_commit(fset, handle);
1315  exit_lock:
1316         presto_release_space(fset->fset_cache, PRESTO_REQHIGH + 4096); 
1317         //        up(&dir->d_inode->i_zombie);
1318         return error;
1319 }
1320
1321 /*
1322  * Look out: this function may change a normal dentry
1323  * into a directory dentry (different size)..
1324  */
1325 int lento_mkdir(const char *name, int mode, struct lento_vfs_context *info)
1326 {
1327         int error;
1328         char *pathname;
1329         struct dentry *dentry;
1330         struct presto_file_set *fset;
1331         struct nameidata nd;
1332
1333         ENTRY;
1334         CDEBUG(D_PIOCTL, "name: %s, mode %o, offset %d, recno %d, flags %x\n",
1335                name, mode, info->slot_offset, info->recno, info->flags);
1336         pathname = getname(name);
1337         error = PTR_ERR(pathname);
1338         if (IS_ERR(pathname)) {
1339                 EXIT;
1340                 return error;
1341         }
1342
1343         error = path_lookup(pathname, LOOKUP_PARENT, &nd);
1344         if (error)
1345                 goto out_name;
1346
1347         dentry = lookup_create(&nd, 1);
1348         error = PTR_ERR(dentry);
1349         if (!IS_ERR(dentry)) {
1350                 fset = presto_fset(dentry);
1351                 error = -EINVAL;
1352                 if (!fset) {
1353                         CERROR("No fileset!\n");
1354                         EXIT;
1355                         goto out_dput;
1356                 }
1357
1358                 error = presto_do_mkdir(fset, nd.dentry, dentry, 
1359                                         mode & S_IALLUGO, info);
1360 out_dput:
1361                 dput(dentry);
1362         }
1363         up(&nd.dentry->d_inode->i_sem);
1364         path_release(&nd);
1365 out_name:
1366         EXIT;
1367         putname(pathname);
1368         CDEBUG(D_PIOCTL, "error: %d\n", error);
1369         return error;
1370 }
1371
1372 static void d_unhash(struct dentry *dentry)
1373 {
1374         dget(dentry);
1375         switch (atomic_read(&dentry->d_count)) {
1376         default:
1377                 shrink_dcache_parent(dentry);
1378                 if (atomic_read(&dentry->d_count) != 2)
1379                         break;
1380         case 2:
1381                 d_drop(dentry);
1382         }
1383 }
1384
1385 int presto_do_rmdir(struct presto_file_set *fset, struct dentry *dir,
1386                     struct dentry *dentry, struct lento_vfs_context *info)
1387 {
1388         struct rec_info rec;
1389         int error;
1390         struct presto_version tgt_dir_ver, old_dir_ver;
1391         struct izo_rollback_data rb;
1392         struct inode_operations *iops;
1393         void *handle;
1394         int do_kml, do_rcvd;
1395         int size;
1396
1397         ENTRY;
1398         error = may_delete(dir->d_inode, dentry, 1);
1399         if (error)
1400                 return error;
1401
1402         error = -EPERM;
1403         iops = filter_c2cdiops(fset->fset_cache->cache_filter);
1404         if (!iops->rmdir) {
1405                 EXIT;
1406                 return error;
1407         }
1408
1409         size = PRESTO_REQHIGH - dentry->d_inode->i_size; 
1410         error = presto_reserve_space(fset->fset_cache, size); 
1411         if (error) { 
1412                 EXIT;
1413                 return error;
1414         }
1415
1416         presto_getversion(&tgt_dir_ver, dir->d_inode);
1417         presto_getversion(&old_dir_ver, dentry->d_inode);
1418         izo_get_rollback_data(dentry->d_inode, &rb);
1419         handle = presto_trans_start(fset, dir->d_inode, KML_OPCODE_RMDIR);
1420         if ( IS_ERR(handle) ) {
1421                 presto_release_space(fset->fset_cache, size); 
1422                 CERROR("ERROR: presto_do_rmdir: no space for transaction. Tell Peter.\n");
1423                 return -ENOSPC;
1424         }
1425
1426         DQUOT_INIT(dir->d_inode);
1427
1428         do_kml = presto_do_kml(info, dir);
1429         do_rcvd = presto_do_rcvd(info, dir);
1430
1431         //        double_down(&dir->d_inode->i_zombie, &dentry->d_inode->i_zombie);
1432         d_unhash(dentry);
1433         if (IS_DEADDIR(dir->d_inode))
1434                 error = -ENOENT;
1435         else if (d_mountpoint(dentry)) {
1436                 CERROR("foo: d_mountpoint(dentry): ino %ld\n",
1437                        dentry->d_inode->i_ino);
1438                 error = -EBUSY;
1439         } else {
1440                 lock_kernel();
1441                 error = iops->rmdir(dir->d_inode, dentry);
1442                 unlock_kernel();
1443                 if (!error) {
1444                         dentry->d_inode->i_flags |= S_DEAD;
1445                         error = presto_settime(fset, NULL, NULL, dir, info,
1446                                                ATTR_CTIME | ATTR_MTIME);
1447                 }
1448         }
1449         //        double_up(&dir->d_inode->i_zombie, &dentry->d_inode->i_zombie);
1450         if (!error)
1451                 d_delete(dentry);
1452         dput(dentry);
1453
1454         presto_debug_fail_blkdev(fset, KML_OPCODE_RMDIR | 0x10);
1455         if ( !error && do_kml )
1456                 error = presto_journal_rmdir(&rec, fset, dir, &tgt_dir_ver,
1457                                              &old_dir_ver, &rb,
1458                                              dentry->d_name.len,
1459                                              dentry->d_name.name);
1460
1461         presto_debug_fail_blkdev(fset, KML_OPCODE_RMDIR | 0x20);
1462         if ( !error && do_rcvd ) 
1463                 error = presto_write_last_rcvd(&rec, fset, info);
1464
1465         presto_debug_fail_blkdev(fset, KML_OPCODE_RMDIR | 0x30);
1466         EXIT;
1467
1468         presto_trans_commit(fset, handle);
1469         presto_release_space(fset->fset_cache, size); 
1470         return error;
1471 }
1472
1473 int lento_rmdir(const char *pathname, struct lento_vfs_context *info)
1474 {
1475         int error = 0;
1476         char * name;
1477         struct dentry *dentry;
1478         struct presto_file_set *fset;
1479         struct nameidata nd;
1480
1481         ENTRY;
1482         name = getname(pathname);
1483         if(IS_ERR(name)) {
1484                 EXIT;
1485                 return PTR_ERR(name);
1486         }
1487
1488         error = path_lookup(name, LOOKUP_PARENT, &nd);
1489         if (error) {
1490                 EXIT;
1491                 goto exit;
1492         }
1493         switch(nd.last_type) {
1494         case LAST_DOTDOT:
1495                 error = -ENOTEMPTY;
1496                 EXIT;
1497                 goto exit1;
1498         case LAST_ROOT:
1499         case LAST_DOT:
1500                 error = -EBUSY;
1501                 EXIT;
1502                 goto exit1;
1503         }
1504         down(&nd.dentry->d_inode->i_sem);
1505         dentry = lookup_hash(&nd.last, nd.dentry);
1506         error = PTR_ERR(dentry);
1507         if (!IS_ERR(dentry)) {
1508                 fset = presto_fset(dentry);
1509                 error = -EINVAL;
1510                 if ( !fset ) {
1511                         CERROR("No fileset!\n");
1512                         EXIT;
1513                         goto exit_put;
1514                 }
1515                 error = presto_do_rmdir(fset, nd.dentry, dentry, info);
1516         exit_put:
1517                 dput(dentry);
1518         }
1519         up(&nd.dentry->d_inode->i_sem);
1520 exit1:
1521         path_release(&nd);
1522 exit:
1523         putname(name);
1524         EXIT;
1525         return error;
1526 }
1527
1528 int presto_do_mknod(struct presto_file_set *fset, struct dentry *dir,
1529                     struct dentry *dentry, int mode, dev_t dev,
1530                     struct lento_vfs_context *info)
1531 {
1532         struct rec_info rec;
1533         int error = -EPERM;
1534         struct presto_version tgt_dir_ver, new_node_ver;
1535         struct inode_operations *iops;
1536         void *handle;
1537
1538         ENTRY;
1539
1540         //        down(&dir->d_inode->i_zombie);
1541         /* one KML entry */ 
1542         error = presto_reserve_space(fset->fset_cache, PRESTO_REQHIGH); 
1543         if (error) {
1544                 EXIT;
1545                 //                up(&dir->d_inode->i_zombie);
1546                 return error;
1547         }
1548
1549         if ((S_ISCHR(mode) || S_ISBLK(mode)) && !capable(CAP_MKNOD)) {
1550                 EXIT;
1551                 goto exit_lock;
1552         }
1553
1554         error = may_create(dir->d_inode, dentry);
1555         if (error) {
1556                 EXIT;
1557                 goto exit_lock;
1558         }
1559
1560         error = -EPERM;
1561         iops = filter_c2cdiops(fset->fset_cache->cache_filter);
1562         if (!iops->mknod) {
1563                 EXIT;
1564                 goto exit_lock;
1565         }
1566
1567         DQUOT_INIT(dir->d_inode);
1568         lock_kernel();
1569         
1570         error = -ENOSPC;
1571         presto_getversion(&tgt_dir_ver, dir->d_inode);
1572         handle = presto_trans_start(fset, dir->d_inode, KML_OPCODE_MKNOD);
1573         if ( IS_ERR(handle) ) {
1574                 presto_release_space(fset->fset_cache, PRESTO_REQHIGH); 
1575                 CERROR("presto_do_mknod: no space for transaction\n");
1576                 goto exit_lock2;
1577         }
1578
1579         error = iops->mknod(dir->d_inode, dentry, mode, dev);
1580         if (error) {
1581                 EXIT;
1582                 goto exit_commit;
1583         }
1584         if ( dentry->d_inode) {
1585                 struct presto_cache *cache = fset->fset_cache;
1586
1587                 presto_set_ops(dentry->d_inode, cache->cache_filter);
1588
1589                 filter_setup_dentry_ops(cache->cache_filter, dentry->d_op, 
1590                                         &presto_dentry_ops);
1591                 dentry->d_op = filter_c2udops(cache->cache_filter);
1592
1593                 /* if Lento does this, we won't have data */
1594                 if ( ISLENTO(presto_c2m(cache)) ) {
1595                         presto_set(dentry, PRESTO_ATTR);
1596                 } else {
1597                         presto_set(dentry, PRESTO_ATTR | PRESTO_DATA);
1598                 }
1599         }
1600
1601         error = presto_settime(fset, NULL, NULL, dir,
1602                                info, ATTR_MTIME);
1603         if (error) { 
1604                 EXIT;
1605         }
1606         error = presto_settime(fset, NULL, NULL, dentry,
1607                                info, ATTR_CTIME | ATTR_MTIME);
1608         if (error) { 
1609                 EXIT;
1610         }
1611
1612         presto_debug_fail_blkdev(fset, KML_OPCODE_MKNOD | 0x10);
1613         presto_getversion(&new_node_ver, dentry->d_inode);
1614         if ( presto_do_kml(info, dentry) )
1615                 error = presto_journal_mknod(&rec, fset, dentry, &tgt_dir_ver,
1616                                              &new_node_ver, 
1617                                              dentry->d_inode->i_mode,
1618                                              MAJOR(dev), MINOR(dev) );
1619
1620         presto_debug_fail_blkdev(fset, KML_OPCODE_MKNOD | 0x20);
1621         if ( presto_do_rcvd(info, dentry) )
1622                 error = presto_write_last_rcvd(&rec, fset, info);
1623
1624         presto_debug_fail_blkdev(fset, KML_OPCODE_MKNOD | 0x30);
1625         EXIT;
1626  exit_commit:
1627         presto_trans_commit(fset, handle);
1628  exit_lock2:
1629         unlock_kernel();
1630  exit_lock:
1631         presto_release_space(fset->fset_cache, PRESTO_REQHIGH); 
1632         //        up(&dir->d_inode->i_zombie);
1633         return error;
1634 }
1635
1636 int lento_mknod(const char *filename, int mode, dev_t dev,
1637                 struct lento_vfs_context *info)
1638 {
1639         int error = 0;
1640         char * tmp;
1641         struct dentry * dentry;
1642         struct nameidata nd;
1643         struct presto_file_set *fset;
1644
1645         ENTRY;
1646
1647         if (S_ISDIR(mode))
1648                 return -EPERM;
1649         tmp = getname(filename);
1650         if (IS_ERR(tmp))
1651                 return PTR_ERR(tmp);
1652
1653         error = path_lookup(tmp, LOOKUP_PARENT, &nd);
1654         if (error)
1655                 goto out;
1656         dentry = lookup_create(&nd, 0);
1657         error = PTR_ERR(dentry);
1658         if (!IS_ERR(dentry)) {
1659                 fset = presto_fset(dentry);
1660                 error = -EINVAL;
1661                 if ( !fset ) {
1662                         CERROR("No fileset!\n");
1663                         EXIT;
1664                         goto exit_put;
1665                 }
1666                 switch (mode & S_IFMT) {
1667                 case 0: case S_IFREG:
1668                         error = -EOPNOTSUPP;
1669                         break;
1670                 case S_IFCHR: case S_IFBLK: case S_IFIFO: case S_IFSOCK:
1671                         error = presto_do_mknod(fset, nd.dentry, dentry, 
1672                                                 mode, dev, info);
1673                         break;
1674                 case S_IFDIR:
1675                         error = -EPERM;
1676                         break;
1677                 default:
1678                         error = -EINVAL;
1679                 }
1680         exit_put:
1681                 dput(dentry);
1682         }
1683         up(&nd.dentry->d_inode->i_sem);
1684         path_release(&nd);
1685 out:
1686         putname(tmp);
1687
1688         return error;
1689 }
1690
1691 int do_rename(struct presto_file_set *fset,
1692                      struct dentry *old_parent, struct dentry *old_dentry,
1693                      struct dentry *new_parent, struct dentry *new_dentry,
1694                      struct lento_vfs_context *info)
1695 {
1696         struct rec_info rec;
1697         int error;
1698         struct inode_operations *iops;
1699         struct presto_version src_dir_ver, tgt_dir_ver;
1700         void *handle;
1701         int new_inode_unlink = 0;
1702         struct inode *old_dir = old_parent->d_inode;
1703         struct inode *new_dir = new_parent->d_inode;
1704
1705         ENTRY;
1706         presto_getversion(&src_dir_ver, old_dir);
1707         presto_getversion(&tgt_dir_ver, new_dir);
1708
1709         error = -EPERM;
1710         iops = filter_c2cdiops(fset->fset_cache->cache_filter);
1711         if (!iops || !iops->rename) {
1712                 EXIT;
1713                 return error;
1714         }
1715
1716         error = presto_reserve_space(fset->fset_cache, PRESTO_REQHIGH); 
1717         if (error) {
1718                 EXIT;
1719                 return error;
1720         }
1721         handle = presto_trans_start(fset, old_dir, KML_OPCODE_RENAME);
1722         if ( IS_ERR(handle) ) {
1723                 presto_release_space(fset->fset_cache, PRESTO_REQHIGH); 
1724                 CERROR("presto_do_rename: no space for transaction\n");
1725                 return -ENOSPC;
1726         }
1727         if (new_dentry->d_inode && new_dentry->d_inode->i_nlink > 1) { 
1728                 dget(new_dentry); 
1729                 new_inode_unlink = 1;
1730         }
1731
1732         error = iops->rename(old_dir, old_dentry, new_dir, new_dentry);
1733
1734         if (error) {
1735                 EXIT;
1736                 goto exit;
1737         }
1738
1739         if (new_inode_unlink) { 
1740                 error = presto_settime(fset, NULL, NULL, old_dentry,
1741                                        info, ATTR_CTIME);
1742                 dput(old_dentry); 
1743                 if (error) { 
1744                         EXIT;
1745                         goto exit;
1746                 }
1747         }
1748         info->flags |= LENTO_FL_TOUCH_PARENT;
1749         error = presto_settime(fset, NULL, new_parent, old_parent,
1750                                info, ATTR_CTIME | ATTR_MTIME);
1751         if (error) { 
1752                 EXIT;
1753                 goto exit;
1754         }
1755
1756         /* XXX make a distinction between cross file set
1757          * and intra file set renames here
1758          */
1759         presto_debug_fail_blkdev(fset, KML_OPCODE_RENAME | 0x10);
1760         if ( presto_do_kml(info, old_dentry) )
1761                 error = presto_journal_rename(&rec, fset, old_dentry,
1762                                               new_dentry,
1763                                               &src_dir_ver, &tgt_dir_ver);
1764
1765         presto_debug_fail_blkdev(fset, KML_OPCODE_RENAME | 0x20);
1766
1767         if ( presto_do_rcvd(info, old_dentry) )
1768                 error = presto_write_last_rcvd(&rec, fset, info);
1769
1770         presto_debug_fail_blkdev(fset, KML_OPCODE_RENAME | 0x30);
1771         EXIT;
1772 exit:
1773         presto_trans_commit(fset, handle);
1774         presto_release_space(fset->fset_cache, PRESTO_REQHIGH); 
1775         return error;
1776 }
1777
1778 static
1779 int presto_rename_dir(struct presto_file_set *fset, struct dentry *old_parent,
1780                       struct dentry *old_dentry, struct dentry *new_parent,
1781                       struct dentry *new_dentry, struct lento_vfs_context *info)
1782 {
1783         int error;
1784         struct inode *target;
1785         struct inode *old_dir = old_parent->d_inode;
1786         struct inode *new_dir = new_parent->d_inode;
1787
1788         if (old_dentry->d_inode == new_dentry->d_inode)
1789                 return 0;
1790
1791         error = may_delete(old_dir, old_dentry, 1);
1792         if (error)
1793                 return error;
1794
1795         if (new_dir->i_sb != old_dir->i_sb)
1796                 return -EXDEV;
1797
1798         if (!new_dentry->d_inode)
1799                 error = may_create(new_dir, new_dentry);
1800         else
1801                 error = may_delete(new_dir, new_dentry, 1);
1802         if (error)
1803                 return error;
1804
1805         if (!old_dir->i_op || !old_dir->i_op->rename)
1806                 return -EPERM;
1807
1808         /*
1809          * If we are going to change the parent - check write permissions,
1810          * we'll need to flip '..'.
1811          */
1812         if (new_dir != old_dir) {
1813                 error = permission(old_dentry->d_inode, MAY_WRITE, NULL);
1814         }
1815         if (error)
1816                 return error;
1817
1818         DQUOT_INIT(old_dir);
1819         DQUOT_INIT(new_dir);
1820         down(&old_dir->i_sb->s_vfs_rename_sem);
1821         error = -EINVAL;
1822         if (is_subdir(new_dentry, old_dentry))
1823                 goto out_unlock;
1824         target = new_dentry->d_inode;
1825         if (target) { /* Hastur! Hastur! Hastur! */
1826                 //                triple_down(&old_dir->i_zombie,
1827                 //                            &new_dir->i_zombie,
1828                 //                            &target->i_zombie);
1829                 d_unhash(new_dentry);
1830         } else
1831                 //                double_down(&old_dir->i_zombie,
1832                 //                            &new_dir->i_zombie);
1833         if (IS_DEADDIR(old_dir)||IS_DEADDIR(new_dir))
1834                 error = -ENOENT;
1835         else if (d_mountpoint(old_dentry)||d_mountpoint(new_dentry))
1836                 error = -EBUSY;
1837         else 
1838                 error = do_rename(fset, old_parent, old_dentry,
1839                                          new_parent, new_dentry, info);
1840         if (target) {
1841                 if (!error)
1842                         target->i_flags |= S_DEAD;
1843                 //                triple_up(&old_dir->i_zombie,
1844                 //                          &new_dir->i_zombie,
1845                 //                          &target->i_zombie);
1846                 if (d_unhashed(new_dentry))
1847                         d_rehash(new_dentry);
1848                 dput(new_dentry);
1849         } else
1850                 //                double_up(&old_dir->i_zombie,
1851                 //                          &new_dir->i_zombie);
1852                 
1853         if (!error)
1854                 d_move(old_dentry,new_dentry);
1855 out_unlock:
1856         up(&old_dir->i_sb->s_vfs_rename_sem);
1857         return error;
1858 }
1859
1860 static
1861 int presto_rename_other(struct presto_file_set *fset, struct dentry *old_parent,
1862                         struct dentry *old_dentry, struct dentry *new_parent,
1863                         struct dentry *new_dentry, struct lento_vfs_context *info)
1864 {
1865         struct inode *old_dir = old_parent->d_inode;
1866         struct inode *new_dir = new_parent->d_inode;
1867         int error;
1868
1869         if (old_dentry->d_inode == new_dentry->d_inode)
1870                 return 0;
1871
1872         error = may_delete(old_dir, old_dentry, 0);
1873         if (error)
1874                 return error;
1875
1876         if (new_dir->i_sb != old_dir->i_sb)
1877                 return -EXDEV;
1878
1879         if (!new_dentry->d_inode)
1880                 error = may_create(new_dir, new_dentry);
1881         else
1882                 error = may_delete(new_dir, new_dentry, 0);
1883         if (error)
1884                 return error;
1885
1886         if (!old_dir->i_op || !old_dir->i_op->rename)
1887                 return -EPERM;
1888
1889         DQUOT_INIT(old_dir);
1890         DQUOT_INIT(new_dir);
1891         //        double_down(&old_dir->i_zombie, &new_dir->i_zombie);
1892         if (d_mountpoint(old_dentry)||d_mountpoint(new_dentry))
1893                 error = -EBUSY;
1894         else
1895                 error = do_rename(fset, old_parent, old_dentry,
1896                                   new_parent, new_dentry, info);
1897         //        double_up(&old_dir->i_zombie, &new_dir->i_zombie);
1898         if (error)
1899                 return error;
1900         /* The following d_move() should become unconditional */
1901         if (!(old_dir->i_sb->s_type->fs_flags & FS_ODD_RENAME)) {
1902                 d_move(old_dentry, new_dentry);
1903         }
1904         return 0;
1905 }
1906
1907 int presto_do_rename(struct presto_file_set *fset, 
1908               struct dentry *old_parent, struct dentry *old_dentry,
1909               struct dentry *new_parent, struct dentry *new_dentry,
1910               struct lento_vfs_context *info)
1911 {
1912         if (S_ISDIR(old_dentry->d_inode->i_mode))
1913                 return presto_rename_dir(fset, old_parent,old_dentry,new_parent,
1914                                       new_dentry, info);
1915         else
1916                 return presto_rename_other(fset, old_parent, old_dentry,
1917                                            new_parent,new_dentry, info);
1918 }
1919
1920
1921 int lento_do_rename(const char *oldname, const char *newname,
1922                  struct lento_vfs_context *info)
1923 {
1924         int error = 0;
1925         struct dentry * old_dir, * new_dir;
1926         struct dentry * old_dentry, *new_dentry;
1927         struct nameidata oldnd, newnd;
1928         struct presto_file_set *fset;
1929
1930         ENTRY;
1931
1932         error = path_lookup(oldname, LOOKUP_PARENT, &oldnd);
1933         if (error)
1934                 goto exit;
1935
1936         error = path_lookup(newname, LOOKUP_PARENT, &newnd);
1937         if (error)
1938                 goto exit1;
1939
1940         error = -EXDEV;
1941         if (oldnd.mnt != newnd.mnt)
1942                 goto exit2;
1943
1944         old_dir = oldnd.dentry;
1945         error = -EBUSY;
1946         if (oldnd.last_type != LAST_NORM)
1947                 goto exit2;
1948
1949         new_dir = newnd.dentry;
1950         if (newnd.last_type != LAST_NORM)
1951                 goto exit2;
1952
1953         lock_rename(new_dir, old_dir);
1954
1955         old_dentry = lookup_hash(&oldnd.last, old_dir);
1956         error = PTR_ERR(old_dentry);
1957         if (IS_ERR(old_dentry))
1958                 goto exit3;
1959         /* source must exist */
1960         error = -ENOENT;
1961         if (!old_dentry->d_inode)
1962                 goto exit4;
1963         fset = presto_fset(old_dentry);
1964         error = -EINVAL;
1965         if ( !fset ) {
1966                 CERROR("No fileset!\n");
1967                 EXIT;
1968                 goto exit4;
1969         }
1970         /* unless the source is a directory trailing slashes give -ENOTDIR */
1971         if (!S_ISDIR(old_dentry->d_inode->i_mode)) {
1972                 error = -ENOTDIR;
1973                 if (oldnd.last.name[oldnd.last.len])
1974                         goto exit4;
1975                 if (newnd.last.name[newnd.last.len])
1976                         goto exit4;
1977         }
1978         new_dentry = lookup_hash(&newnd.last, new_dir);
1979         error = PTR_ERR(new_dentry);
1980         if (IS_ERR(new_dentry))
1981                 goto exit4;
1982
1983         lock_kernel();
1984         error = presto_do_rename(fset, old_dir, old_dentry,
1985                                    new_dir, new_dentry, info);
1986         unlock_kernel();
1987
1988         dput(new_dentry);
1989 exit4:
1990         dput(old_dentry);
1991 exit3:
1992         unlock_rename(new_dir, old_dir);
1993 exit2:
1994         path_release(&newnd);
1995 exit1:
1996         path_release(&oldnd);
1997 exit:
1998         return error;
1999 }
2000
2001 int  lento_rename(const char * oldname, const char * newname,
2002                   struct lento_vfs_context *info)
2003 {
2004         int error;
2005         char * from;
2006         char * to;
2007
2008         from = getname(oldname);
2009         if(IS_ERR(from))
2010                 return PTR_ERR(from);
2011         to = getname(newname);
2012         error = PTR_ERR(to);
2013         if (!IS_ERR(to)) {
2014                 error = lento_do_rename(from,to, info);
2015                 putname(to);
2016         } 
2017         putname(from);
2018         return error;
2019 }
2020
2021 struct dentry *presto_iopen(struct dentry *dentry,
2022                             ino_t ino, unsigned int generation)
2023 {
2024         struct presto_file_set *fset;
2025         char name[48];
2026         int error;
2027
2028         ENTRY;
2029         /* see if we already have the dentry we want */
2030         if (dentry->d_inode && dentry->d_inode->i_ino == ino &&
2031             dentry->d_inode->i_generation == generation) {
2032                 EXIT;
2033                 return dentry;
2034         }
2035
2036         /* Make sure we have a cache beneath us.  We should always find at
2037          * least one dentry inside the cache (if it exists), otherwise not
2038          * even the cache root exists, or we passed in a bad name.
2039          */
2040         fset = presto_fset(dentry);
2041         error = -EINVAL;
2042         if (!fset) {
2043                 CERROR("No fileset for %*s!\n",
2044                        dentry->d_name.len, dentry->d_name.name);
2045                 EXIT;
2046                 dput(dentry);
2047                 return ERR_PTR(error);
2048         }
2049         dput(dentry);
2050
2051         sprintf(name, "%s%#lx%c%#x",
2052                 PRESTO_ILOOKUP_MAGIC, ino, PRESTO_ILOOKUP_SEP, generation);
2053         CDEBUG(D_PIOCTL, "opening %ld by number (as %s)\n", ino, name);
2054         return lookup_one_len(name, fset->fset_dentry, strlen(name));
2055 }
2056
2057 static struct file *presto_filp_dopen(struct dentry *dentry, int flags)
2058 {
2059         struct file *f;
2060         struct inode *inode;
2061         int flag, error;
2062
2063         ENTRY;
2064         error = -ENFILE;
2065         f = get_empty_filp();
2066         if (!f) {
2067                 CDEBUG(D_PIOCTL, "error getting file pointer\n");
2068                 EXIT;
2069                 goto out;
2070         }
2071         f->f_flags = flag = flags;
2072         f->f_mode = (flag+1) & O_ACCMODE;
2073         inode = dentry->d_inode;
2074         if (f->f_mode & FMODE_WRITE) {
2075                 error = get_write_access(inode);
2076                 if (error) {
2077                         CDEBUG(D_PIOCTL, "error getting write access\n");
2078                         EXIT;                        goto cleanup_file;
2079                 }
2080         }
2081
2082         /* XXX: where the fuck is ->f_vfsmnt? */
2083         f->f_dentry = dentry;
2084         f->f_mapping = dentry->d_inode->i_mapping;
2085         f->f_pos = 0;
2086         //f->f_reada = 0;
2087         f->f_op = NULL;
2088         if (inode->i_op)
2089                 /* XXX should we set to presto ops, or leave at cache ops? */
2090                 f->f_op = inode->i_fop;
2091         if (f->f_op && f->f_op->open) {
2092                 error = f->f_op->open(inode, f);
2093                 if (error) {
2094                         CDEBUG(D_PIOCTL, "error calling cache 'open'\n");
2095                         EXIT;
2096                         goto cleanup_all;
2097                 }
2098         }
2099         f->f_flags &= ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC);
2100
2101         return f;
2102
2103 cleanup_all:
2104         if (f->f_mode & FMODE_WRITE)
2105                 put_write_access(inode);
2106 cleanup_file:
2107         put_filp(f);
2108 out:
2109         return ERR_PTR(error);
2110 }
2111
2112
2113 /* Open an inode by number.  We pass in the cache root name (or a subdirectory
2114  * from the cache that is guaranteed to exist) to be able to access the cache.
2115  */
2116 int lento_iopen(const char *name, ino_t ino, unsigned int generation,
2117                 int flags)
2118 {
2119         char * tmp;
2120         struct dentry *dentry;
2121         struct nameidata nd;
2122         int fd;
2123         int error;
2124
2125         ENTRY;
2126         CDEBUG(D_PIOCTL,
2127                "open %s:inode %#lx (%ld), generation %x (%d), flags %d \n",
2128                name, ino, ino, generation, generation, flags);
2129         /* We don't allow creation of files by number only, as it would
2130          * lead to a dangling files not in any directory.  We could also
2131          * just turn off the flag and ignore it.
2132          */
2133         if (flags & O_CREAT) {
2134                 CERROR("%s: create file by inode number (%ld) not allowed\n",
2135                        __FUNCTION__, ino);
2136                 EXIT;
2137                 return -EACCES;
2138         }
2139
2140         tmp = getname(name);
2141         if (IS_ERR(tmp)) {
2142                 EXIT;
2143                 return PTR_ERR(tmp);
2144         }
2145
2146         lock_kernel();
2147 again:  /* look the named file or a parent directory so we can get the cache */
2148         error = presto_walk(tmp, &nd);
2149         if ( error && error != -ENOENT ) {
2150                 EXIT;
2151                 unlock_kernel();
2152                 putname(tmp);
2153                 return error;
2154         } 
2155         if (error == -ENOENT)
2156                 dentry = NULL;
2157         else 
2158                 dentry = nd.dentry;
2159
2160         /* we didn't find the named file, so see if a parent exists */
2161         if (!dentry) {
2162                 char *slash;
2163
2164                 slash = strrchr(tmp, '/');
2165                 if (slash && slash != tmp) {
2166                         *slash = '\0';
2167                         path_release(&nd);
2168                         goto again;
2169                 }
2170                 /* we should never get here... */
2171                 CDEBUG(D_PIOCTL, "no more path components to try!\n");
2172                 fd = -ENOENT;
2173                 goto exit;
2174         }
2175         CDEBUG(D_PIOCTL, "returned dentry %p\n", dentry);
2176
2177         dentry = presto_iopen(dentry, ino, generation);
2178         fd = PTR_ERR(dentry);
2179         if (IS_ERR(dentry)) {
2180                 EXIT;
2181                 goto exit;
2182         }
2183
2184         /* XXX start of code that might be replaced by something like:
2185          * if (flags & (O_WRONLY | O_RDWR)) {
2186          *      error = get_write_access(dentry->d_inode);
2187          *      if (error) {
2188          *              EXIT;
2189          *              goto cleanup_dput;
2190          *      }
2191          * }
2192          * fd = open_dentry(dentry, flags);
2193          *
2194          * including the presto_filp_dopen() function (check dget counts!)
2195          */
2196         fd = get_unused_fd();
2197         if (fd < 0) {
2198                 EXIT;
2199                 goto exit;
2200         }
2201
2202         {
2203                 int error;
2204                 struct file * f = presto_filp_dopen(dentry, flags);
2205                 error = PTR_ERR(f);
2206                 if (IS_ERR(f)) {
2207                         put_unused_fd(fd);
2208                         fd = error;
2209                 } else {
2210                         fd_install(fd, f);
2211                 }
2212         }
2213         /* end of code that might be replaced by open_dentry */
2214
2215         EXIT;
2216 exit:
2217         unlock_kernel();
2218         path_release(&nd);
2219         putname(tmp);
2220         return fd;
2221 }
2222
2223 #ifdef CONFIG_FS_EXT_ATTR
2224
2225 #if 0 /* was a broken check for Posix ACLs */
2226 /* Posix ACL code changes i_mode without using a notify_change (or
2227  * a mark_inode_dirty!). We need to duplicate this at the reintegrator
2228  * which is done by this function. This function also takes care of 
2229  * resetting the cached posix acls in this inode. If we don't reset these
2230  * VFS continues using the old acl information, which by now may be out of
2231  * date.
2232  */
2233 int presto_setmode(struct presto_file_set *fset, struct dentry *dentry,
2234                    mode_t mode)
2235 {
2236         struct inode *inode = dentry->d_inode;
2237
2238         ENTRY;
2239         /* The extended attributes for this inode were modified. 
2240          * At this point we can not be sure if any of the ACL 
2241          * information for this inode was updated. So we will 
2242          * force VFS to reread the acls. Note that we do this 
2243          * only when called from the SETEXTATTR ioctl, which is why we
2244          * do this while setting the mode of the file. Also note
2245          * that mark_inode_dirty is not be needed for i_*acl only
2246          * to force i_mode info to disk, and should be removed once
2247          * we use notify_change to update the mode.
2248          * XXX: is mode setting really needed? Just setting acl's should
2249          * be enough! VFS should change the i_mode as needed? SHP
2250          */
2251         if (inode->i_acl && 
2252             inode->i_acl != POSIX_ACL_NOT_CACHED) 
2253             posix_acl_release(inode->i_acl);
2254         if (inode->i_default_acl && 
2255             inode->i_default_acl != POSIX_ACL_NOT_CACHED) 
2256             posix_acl_release(inode->i_default_acl);
2257         inode->i_acl = POSIX_ACL_NOT_CACHED;
2258         inode->i_default_acl = POSIX_ACL_NOT_CACHED;
2259         inode->i_mode = mode;
2260         /* inode should already be dirty...but just in case */
2261         mark_inode_dirty(inode);
2262         return 0;
2263
2264 #if 0
2265         /* XXX: The following code is the preferred way to set mode, 
2266          * however, I need to carefully go through possible recursion
2267          * paths back into presto. See comments in presto_do_setattr.
2268          */
2269         {    
2270         int error=0; 
2271         struct super_operations *sops;
2272         struct iattr iattr;
2273
2274         iattr.ia_mode = mode;
2275         iattr.ia_valid = ATTR_MODE|ATTR_FORCE;
2276
2277         error = -EPERM;
2278         sops = filter_c2csops(fset->fset_cache->cache_filter); 
2279         if (!sops &&
2280             !sops->notify_change) {
2281                 EXIT;
2282                 return error;
2283         }
2284
2285         error = sops->notify_change(dentry, &iattr);
2286
2287         EXIT;
2288         return error;
2289         }
2290 #endif
2291 }
2292 #endif
2293
2294 /* setextattr Interface to cache filesystem */
2295 int presto_do_set_ext_attr(struct presto_file_set *fset, 
2296                            struct dentry *dentry, 
2297                            const char *name, void *buffer,
2298                            size_t buffer_len, int flags, mode_t *mode,
2299                            struct lento_vfs_context *info) 
2300 {
2301         struct rec_info rec;
2302         struct inode *inode = dentry->d_inode;
2303         struct inode_operations *iops;
2304         int error;
2305         struct presto_version ver;
2306         void *handle;
2307         char temp[PRESTO_EXT_ATTR_NAME_MAX+1];
2308
2309         ENTRY;
2310         error = -EROFS;
2311         if (IS_RDONLY(inode)) {
2312                 EXIT;
2313                 return -EROFS;
2314         }
2315
2316         if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) {
2317                 EXIT;
2318                 return -EPERM;
2319         }
2320
2321         presto_getversion(&ver, inode);
2322         error = -EPERM;
2323         /* We need to invoke different filters based on whether
2324          * this dentry is a regular file, directory or symlink.
2325          */
2326         switch (inode->i_mode & S_IFMT) {
2327                 case S_IFLNK: /* symlink */
2328                     iops = filter_c2csiops(fset->fset_cache->cache_filter); 
2329                     break;
2330                 case S_IFDIR: /* directory */
2331                     iops = filter_c2cdiops(fset->fset_cache->cache_filter); 
2332                     break;
2333                 case S_IFREG:
2334                 default: /* everything else including regular files */
2335                     iops = filter_c2cfiops(fset->fset_cache->cache_filter); 
2336         }
2337
2338         if (!iops && !iops->set_ext_attr) {
2339                 EXIT;
2340                 return error;
2341         }
2342
2343         error = presto_reserve_space(fset->fset_cache, PRESTO_REQHIGH); 
2344         if (error) {
2345                 EXIT;
2346                 return error;
2347         }
2348
2349         
2350         handle = presto_trans_start(fset,dentry->d_inode,KML_OPCODE_SETEXTATTR);
2351         if ( IS_ERR(handle) ) {
2352                 CERROR("presto_do_set_ext_attr: no space for transaction\n");
2353                 presto_release_space(fset->fset_cache, PRESTO_REQHIGH); 
2354                 return -ENOSPC;
2355         }
2356
2357         /* We first "truncate" name to the maximum allowable in presto */
2358         /* This simulates the strncpy_from_use code in fs/ext_attr.c */
2359         strlcpy(temp,name,sizeof(temp));
2360
2361         /* Pass down to cache*/
2362         error = iops->set_ext_attr(inode,temp,buffer,buffer_len,flags);
2363         if (error) {
2364                 EXIT;
2365                 goto exit;
2366         }
2367
2368 #if 0 /* was a broken check for Posix ACLs */
2369         /* Reset mode if specified*/
2370         /* XXX: when we do native acl support, move this code out! */
2371         if (mode != NULL) {
2372                 error = presto_setmode(fset, dentry, *mode);
2373                 if (error) { 
2374                     EXIT;
2375                     goto exit;
2376                 }
2377         }
2378 #endif
2379
2380         /* Reset ctime. Only inode change time (ctime) is affected */
2381         error = presto_settime(fset, NULL, NULL, dentry, info, ATTR_CTIME);
2382         if (error) { 
2383                 EXIT;
2384                 goto exit;
2385         }
2386
2387         if (flags & EXT_ATTR_FLAG_USER) {
2388                 CERROR(" USER flag passed to presto_do_set_ext_attr!\n");
2389                 BUG();
2390         }
2391
2392         /* We are here, so set_ext_attr succeeded. We no longer need to keep
2393          * track of EXT_ATTR_FLAG_{EXISTS,CREATE}, instead, we will force
2394          * the attribute value during log replay. -SHP
2395          */
2396         flags &= ~(EXT_ATTR_FLAG_EXISTS | EXT_ATTR_FLAG_CREATE);
2397
2398         presto_debug_fail_blkdev(fset, KML_OPCODE_SETEXTATTR | 0x10);
2399         if ( presto_do_kml(info, dentry) )
2400                 error = presto_journal_set_ext_attr
2401                         (&rec, fset, dentry, &ver, name, buffer, 
2402                          buffer_len, flags);
2403
2404         presto_debug_fail_blkdev(fset, KML_OPCODE_SETEXTATTR | 0x20);
2405         if ( presto_do_rcvd(info, dentry) )
2406                 error = presto_write_last_rcvd(&rec, fset, info);
2407
2408         presto_debug_fail_blkdev(fset, KML_OPCODE_SETEXTATTR | 0x30);
2409         EXIT;
2410 exit:
2411         presto_release_space(fset->fset_cache, PRESTO_REQHIGH); 
2412         presto_trans_commit(fset, handle);
2413
2414         return error;
2415 }
2416 #endif