This commit was manufactured by cvs2svn to create branch 'vserver'.
[linux-2.6.git] / fs / intermezzo / journal.c
1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2  * vim:expandtab:shiftwidth=8:tabstop=8:
3  *
4  *  Copyright (C) 1998 Peter J. Braam
5  *  Copyright (C) 2001 Cluster File Systems, Inc. 
6  *  Copyright (C) 2001 Tacit Networks, Inc. <phil@off.net>
7  *
8  *  Support for journalling extended attributes
9  *  Copyright (C) 2001 Shirish H. Phatak, Tacit Networks, Inc.
10  * 
11  *   This file is part of InterMezzo, http://www.inter-mezzo.org.
12  *
13  *   InterMezzo is free software; you can redistribute it and/or
14  *   modify it under the terms of version 2 of the GNU General Public
15  *   License as published by the Free Software Foundation.
16  *
17  *   InterMezzo is distributed in the hope that it will be useful,
18  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
19  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
20  *   GNU General Public License for more details.
21  *
22  *   You should have received a copy of the GNU General Public License
23  *   along with InterMezzo; if not, write to the Free Software
24  *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
25  */
26
27 #include <linux/types.h>
28 #include <linux/kernel.h>
29 #include <linux/sched.h>
30 #include <linux/fs.h>
31 #include <linux/namei.h>
32 #include <linux/slab.h>
33 #include <linux/vmalloc.h>
34 #include <linux/time.h>
35 #include <linux/errno.h>
36 #include <asm/segment.h>
37 #include <asm/uaccess.h>
38 #include <linux/string.h>
39
40 #include "intermezzo_fs.h"
41 #include "intermezzo_psdev.h"
42
43 struct presto_reservation_data {
44         unsigned int ri_recno;
45         loff_t ri_offset;
46         loff_t ri_size;
47         struct list_head ri_list;
48 };
49
50 /* 
51  *  Locking Semantics
52  * 
53  * write lock in struct presto_log_fd: 
54  *  - name: fd_lock
55  *  - required for: accessing any field in a presto_log_fd 
56  *  - may not be held across I/O
57  *  - 
58  *  
59  */
60
61 /*
62  *  reserve record space and/or atomically request state of the log
63  *  rec will hold the location reserved record upon return
64  *  this reservation will be placed in the queue
65  */ 
66 static void presto_reserve_record(struct presto_file_set *fset, 
67                            struct presto_log_fd *fd, 
68                            struct rec_info *rec,
69                            struct presto_reservation_data *rd)
70 {
71         int chunked_record = 0; 
72         ENTRY;
73         
74         write_lock(&fd->fd_lock);
75         if ( rec->is_kml ) { 
76                 int chunk = 1 << fset->fset_chunkbits;
77                 int chunk_mask = ~(chunk -1); 
78                 loff_t boundary; 
79
80                 boundary =  (fd->fd_offset + chunk - 1) & chunk_mask;
81                 if ( fd->fd_offset + rec->size >= boundary ) {
82                         chunked_record = 1;
83                         fd->fd_offset = boundary; 
84                 }
85         }
86
87         fd->fd_recno++;
88         
89         /* this moves the fd_offset back after truncation */ 
90         if ( list_empty(&fd->fd_reservations) && 
91              !chunked_record) { 
92                 fd->fd_offset = fd->fd_file->f_dentry->d_inode->i_size;
93         }
94
95         rec->offset = fd->fd_offset;
96         if (rec->is_kml)
97                 rec->offset += fset->fset_kml_logical_off;
98
99         rec->recno = fd->fd_recno;
100
101         /* add the reservation data to the end of the list */
102         rd->ri_offset = fd->fd_offset;
103         rd->ri_size = rec->size;
104         rd->ri_recno = rec->recno; 
105         list_add(&rd->ri_list, fd->fd_reservations.prev);
106
107         fd->fd_offset += rec->size;
108
109         write_unlock(&fd->fd_lock); 
110
111         EXIT;
112 }
113
114 static inline void presto_release_record(struct presto_log_fd *fd,
115                                          struct presto_reservation_data *rd)
116 {
117         write_lock(&fd->fd_lock);
118         list_del(&rd->ri_list);
119         write_unlock(&fd->fd_lock);
120 }
121
122 /* XXX should we ask for do_truncate to be exported? */
123 int izo_do_truncate(struct presto_file_set *fset, struct dentry *dentry,
124                     loff_t length,  loff_t size_check)
125 {
126         struct inode *inode = dentry->d_inode;
127         int error;
128         struct iattr newattrs;
129
130         ENTRY;
131
132         if (length < 0) {
133                 EXIT;
134                 return -EINVAL;
135         }
136
137         down(&inode->i_sem);
138         lock_kernel();
139         
140         if (size_check != inode->i_size) { 
141                 unlock_kernel();
142                 up(&inode->i_sem);
143                 EXIT;
144                 return -EALREADY; 
145         }
146
147         newattrs.ia_size = length;
148         newattrs.ia_valid = ATTR_SIZE | ATTR_CTIME;
149
150         if (inode->i_op && inode->i_op->setattr)
151                 error = inode->i_op->setattr(dentry, &newattrs);
152         else {
153                 inode_setattr(dentry->d_inode, &newattrs);
154                 error = 0;
155         }
156
157         unlock_kernel();
158         up(&inode->i_sem);
159         EXIT;
160         return error;
161 }
162
163 static void presto_kml_truncate(struct presto_file_set *fset)
164 {
165         int rc;
166         ENTRY;
167
168         write_lock(&fset->fset_kml.fd_lock);
169         if (fset->fset_kml.fd_truncating == 1 ) {
170                 write_unlock(&fset->fset_kml.fd_lock);
171                 EXIT;
172                 return;
173         }
174
175         fset->fset_kml.fd_truncating = 1;
176         write_unlock(&fset->fset_kml.fd_lock);
177
178         CERROR("islento: %d, count: %d\n",
179                ISLENTO(presto_i2m(fset->fset_dentry->d_inode)),
180                fset->fset_permit_count);
181
182         rc = izo_upc_kml_truncate(fset->fset_cache->cache_psdev->uc_minor,
183                                 fset->fset_lento_off, fset->fset_lento_recno,
184                                 fset->fset_name);
185
186         /* Userspace is the only permitholder now, and will retain an exclusive
187          * hold on the permit until KML truncation completes. */
188         /* FIXME: double check this code path now that the precise semantics of
189          * fset->fset_permit_count have changed. */
190
191         if (rc != 0) {
192                 write_lock(&fset->fset_kml.fd_lock);
193                 fset->fset_kml.fd_truncating = 0;
194                 write_unlock(&fset->fset_kml.fd_lock);
195         }
196
197         EXIT;
198 }
199
200 void *presto_trans_start(struct presto_file_set *fset, struct inode *inode,
201                          int op)
202 {
203         ENTRY;
204         if ( !fset->fset_cache->cache_filter->o_trops ) {
205                 EXIT;
206                 return NULL;
207         }
208         EXIT;
209         return fset->fset_cache->cache_filter->o_trops->tr_start
210                 (fset, inode, op);
211 }
212
213 void presto_trans_commit(struct presto_file_set *fset, void *handle)
214 {
215         ENTRY;
216         if (!fset->fset_cache->cache_filter->o_trops ) {
217                 EXIT;
218                 return;
219         }
220
221         fset->fset_cache->cache_filter->o_trops->tr_commit(fset, handle);
222
223         /* Check to see if the KML needs truncated. */
224         if (fset->kml_truncate_size > 0 &&
225             !fset->fset_kml.fd_truncating &&
226             fset->fset_kml.fd_offset > fset->kml_truncate_size) {
227                 CDEBUG(D_JOURNAL, "kml size: %lu; truncating\n",
228                        (unsigned long)fset->fset_kml.fd_offset);
229                 presto_kml_truncate(fset);
230         }
231         EXIT;
232 }
233
234 inline int presto_no_journal(struct presto_file_set *fset)
235 {
236         int minor = fset->fset_cache->cache_psdev->uc_minor;
237         return izo_channels[minor].uc_no_journal;
238 }
239
240 #define size_round(x)  (((x)+3) & ~0x3)
241
242 #define BUFF_FREE(buf) PRESTO_FREE(buf, PAGE_SIZE)
243 #define BUFF_ALLOC(newbuf, oldbuf)              \
244         PRESTO_ALLOC(newbuf, PAGE_SIZE);        \
245         if ( !newbuf ) {                        \
246                 if (oldbuf)                     \
247                         BUFF_FREE(oldbuf);      \
248                 return -ENOMEM;                 \
249         }
250
251 /*
252  * "buflen" should be PAGE_SIZE or more.
253  * Give relative path wrt to a fsetroot
254  */
255 char * presto_path(struct dentry *dentry, struct dentry *root,
256                    char *buffer, int buflen)
257 {
258         char * end = buffer+buflen;
259         char * retval;
260
261         *--end = '\0';
262         buflen--;
263         if (dentry->d_parent != dentry && d_unhashed(dentry)) {
264                 buflen -= 10;
265                 end -= 10;
266                 memcpy(end, " (deleted)", 10);
267         }
268
269         /* Get '/' right */
270         retval = end-1;
271         *retval = '/';
272
273         for (;;) {
274                 struct dentry * parent;
275                 int namelen;
276
277                 if (dentry == root)
278                         break;
279                 parent = dentry->d_parent;
280                 if (dentry == parent)
281                         break;
282                 namelen = dentry->d_name.len;
283                 buflen -= namelen + 1;
284                 if (buflen < 0)
285                         break;
286                 end -= namelen;
287                 memcpy(end, dentry->d_name.name, namelen);
288                 *--end = '/';
289                 retval = end;
290                 dentry = parent;
291         }
292         return retval;
293 }
294
295 static inline char *logit(char *buf, const void *value, int size)
296 {
297         char *ptr = (char *)value;
298
299         memcpy(buf, ptr, size);
300         buf += size;
301         return buf;
302 }
303
304
305 static inline char *
306 journal_log_prefix_with_groups_and_ids(char *buf, int opcode, 
307                                        struct rec_info *rec,
308                                        __u32 ngroups, gid_t *groups,
309                                        __u32 fsuid, __u32 fsgid)
310 {
311         struct kml_prefix_hdr p;
312         u32 loggroups[NGROUPS_SMALL];
313
314         int i; 
315
316         p.len = cpu_to_le32(rec->size);
317         p.version = KML_MAJOR_VERSION | KML_MINOR_VERSION;
318         p.pid = cpu_to_le32(current->pid);
319         p.auid = cpu_to_le32(current->uid);
320         p.fsuid = cpu_to_le32(fsuid);
321         p.fsgid = cpu_to_le32(fsgid);
322         p.ngroups = cpu_to_le32(ngroups);
323         p.opcode = cpu_to_le32(opcode);
324         for (i=0 ; i < ngroups ; i++)
325                 loggroups[i] = cpu_to_le32((__u32) groups[i]);
326
327         buf = logit(buf, &p, sizeof(struct kml_prefix_hdr));
328         buf = logit(buf, &loggroups, sizeof(__u32) * ngroups);
329         return buf;
330 }
331
332 static inline char *
333 journal_log_prefix(char *buf, int opcode, struct rec_info *rec)
334 {
335         __u32 groups[NGROUPS_SMALL]; 
336         int i; 
337
338         /* convert 16 bit gid's to 32 bit gid's */
339         for (i=0; i<current->group_info->ngroups; i++) 
340                 groups[i] = GROUP_AT(current->group_info,i);
341         
342         return journal_log_prefix_with_groups_and_ids(buf, opcode, rec,
343                                                       (__u32)current->group_info->ngroups,
344                                                       groups,
345                                                       (__u32)current->fsuid,
346                                                       (__u32)current->fsgid);
347 }
348
349 static inline char *
350 journal_log_prefix_with_groups(char *buf, int opcode, struct rec_info *rec, 
351                                __u32 ngroups, gid_t *groups)
352 {
353         return journal_log_prefix_with_groups_and_ids(buf, opcode, rec,
354                                                       ngroups, groups,
355                                                       (__u32)current->fsuid,
356                                                       (__u32)current->fsgid);
357 }
358
359 static inline char *log_dentry_version(char *buf, struct dentry *dentry)
360 {
361         struct presto_version version;
362
363         presto_getversion(&version, dentry->d_inode);
364         
365         version.pv_mtime_sec = HTON__u32(version.pv_mtime_sec);
366         version.pv_ctime_sec = HTON__u32(version.pv_ctime_sec);
367         version.pv_mtime_nsec = HTON__u32(version.pv_mtime_nsec);
368         version.pv_ctime_nsec = HTON__u32(version.pv_ctime_nsec);
369         version.pv_size = HTON__u64(version.pv_size);
370
371         return logit(buf, &version, sizeof(version));
372 }
373
374 static inline char *log_version(char *buf, struct presto_version *pv)
375 {
376         struct presto_version version;
377
378         memcpy(&version, pv, sizeof(version));
379         
380         version.pv_mtime_sec = HTON__u32(version.pv_mtime_sec);
381         version.pv_mtime_nsec = HTON__u32(version.pv_mtime_nsec);
382         version.pv_ctime_sec = HTON__u32(version.pv_ctime_sec);
383         version.pv_ctime_nsec = HTON__u32(version.pv_ctime_nsec);
384         version.pv_size = HTON__u64(version.pv_size);
385
386         return logit(buf, &version, sizeof(version));
387 }
388
389 static inline char *log_rollback(char *buf, struct izo_rollback_data *rb)
390 {
391         struct izo_rollback_data rollback;
392         
393         rollback.rb_mode = HTON__u32(rb->rb_mode);
394         rollback.rb_rdev = HTON__u32(rb->rb_rdev);
395         rollback.rb_uid = HTON__u64(rb->rb_uid);
396         rollback.rb_gid = HTON__u64(rb->rb_gid);
397
398         return logit(buf, &rollback, sizeof(rollback));
399 }
400
401 static inline char *journal_log_suffix(char *buf, char *log,
402                                        struct presto_file_set *fset,
403                                        struct dentry *dentry,
404                                        struct rec_info *rec)
405 {
406         struct kml_suffix s;
407         struct kml_prefix_hdr *p = (struct kml_prefix_hdr *)log;
408
409 #if 0
410         /* XXX needs to be done after reservation, 
411            disable ths until version 1.2 */
412         if ( dentry ) { 
413                 s.prevrec = cpu_to_le32(rec->offset - 
414                                         presto_d2d(dentry)->dd_kml_offset);
415                 presto_d2d(dentry)->dd_kml_offset = rec->offset;
416         } else { 
417                 s.prevrec = -1;
418         }
419 #endif
420         s.prevrec = 0; 
421
422         /* record number needs to be filled in after reservation 
423            s.recno = cpu_to_le32(rec->recno); */ 
424         s.time = cpu_to_le32(get_seconds());
425         s.len = p->len;
426         return logit(buf, &s, sizeof(s));
427 }
428
429 int izo_log_close(struct presto_log_fd *logfd)
430 {
431         int rc = 0;
432
433         if (logfd->fd_file) {
434                 rc = filp_close(logfd->fd_file, 0);
435                 logfd->fd_file = NULL;
436         } else
437                 CERROR("InterMezzo: %s: no filp\n", __FUNCTION__);
438         if (rc != 0)
439                 CERROR("InterMezzo: close files: filp won't close: %d\n", rc);
440
441         return rc;
442 }
443
444 int presto_fwrite(struct file *file, const char *str, int len, loff_t *off)
445 {
446         int rc;
447         mm_segment_t old_fs;
448         ENTRY;
449
450         rc = -EINVAL;
451         if ( !off ) {
452                 EXIT;
453                 return rc;
454         }
455
456         if ( ! file ) {
457                 EXIT;
458                 return rc;
459         }
460
461         if ( ! file->f_op ) {
462                 EXIT;
463                 return rc;
464         }
465
466         if ( ! file->f_op->write ) {
467                 EXIT;
468                 return rc;
469         }
470
471         old_fs = get_fs();
472         set_fs(get_ds());
473         rc = file->f_op->write(file, str, len, off);
474         if (rc != len) {
475                 CERROR("presto_fwrite: wrote %d bytes instead of "
476                        "%d at %ld\n", rc, len, (long)*off);
477                 rc = -EIO; 
478         }
479         set_fs(old_fs);
480         EXIT;
481         return rc;
482 }
483
484 int presto_fread(struct file *file, char *str, int len, loff_t *off)
485 {
486         int rc;
487         mm_segment_t old_fs;
488         ENTRY;
489
490         if (len > 512)
491                 CERROR("presto_fread: read at %Ld for %d bytes, ino %ld\n",
492                        *off, len, file->f_dentry->d_inode->i_ino); 
493
494         rc = -EINVAL;
495         if ( !off ) {
496                 EXIT;
497                 return rc;
498         }
499
500         if ( ! file ) {
501                 EXIT;
502                 return rc;
503         }
504
505         if ( ! file->f_op ) {
506                 EXIT;
507                 return rc;
508         }
509
510         if ( ! file->f_op->read ) {
511                 EXIT;
512                 return rc;
513         }
514
515         old_fs = get_fs();
516         set_fs(get_ds());
517         rc = file->f_op->read(file, str, len, off);
518         if (rc != len) {
519                 CDEBUG(D_FILE, "presto_fread: read %d bytes instead of "
520                        "%d at %Ld\n", rc, len, *off);
521                 rc = -EIO; 
522         }
523         set_fs(old_fs);
524         EXIT;
525         return rc;
526 }
527
528 loff_t presto_kml_offset(struct presto_file_set *fset)
529 {
530         unsigned int kml_recno;
531         struct presto_log_fd *fd = &fset->fset_kml;
532         loff_t  offset;
533         ENTRY;
534
535         write_lock(&fd->fd_lock); 
536
537         /* Determine the largest valid offset, i.e. up until the first
538          * reservation held on the file. */
539         if ( !list_empty(&fd->fd_reservations) ) {
540                 struct presto_reservation_data *rd;
541                 rd = list_entry(fd->fd_reservations.next, 
542                                 struct presto_reservation_data, 
543                                 ri_list);
544                 offset = rd->ri_offset;
545                 kml_recno = rd->ri_recno;
546         } else {
547                 offset = fd->fd_file->f_dentry->d_inode->i_size;
548                 kml_recno = fset->fset_kml.fd_recno; 
549         }
550         write_unlock(&fd->fd_lock); 
551         return offset; 
552 }
553
554 static int presto_kml_dispatch(struct presto_file_set *fset)
555 {
556         int rc = 0;
557         unsigned int kml_recno;
558         struct presto_log_fd *fd = &fset->fset_kml;
559         loff_t offset;
560         ENTRY;
561
562         write_lock(&fd->fd_lock); 
563
564         /* Determine the largest valid offset, i.e. up until the first
565          * reservation held on the file. */
566         if ( !list_empty(&fd->fd_reservations) ) {
567                 struct presto_reservation_data *rd;
568                 rd = list_entry(fd->fd_reservations.next, 
569                                 struct presto_reservation_data, 
570                                 ri_list);
571                 offset = rd->ri_offset;
572                 kml_recno = rd->ri_recno;
573         } else {
574                 offset = fd->fd_file->f_dentry->d_inode->i_size;
575                 kml_recno = fset->fset_kml.fd_recno; 
576         }
577
578         if ( kml_recno < fset->fset_lento_recno ) {
579                 CERROR("presto_kml_dispatch: smoke is coming\n"); 
580                 write_unlock(&fd->fd_lock);
581                 EXIT;
582                 return 0; 
583         } else if ( kml_recno == fset->fset_lento_recno ) {
584                 write_unlock(&fd->fd_lock);
585                 EXIT;
586                 return 0; 
587                 /* XXX add a further "if" here to delay the KML upcall */ 
588 #if 0
589         } else if ( kml_recno < fset->fset_lento_recno + 100) {
590                 write_unlock(&fd->fd_lock);
591                 EXIT;
592                 return 0;
593 #endif
594         }
595         CDEBUG(D_PIOCTL, "fset: %s\n", fset->fset_name);
596
597         rc = izo_upc_kml(fset->fset_cache->cache_psdev->uc_minor,
598                        fset->fset_lento_off, fset->fset_lento_recno,
599                        offset + fset->fset_kml_logical_off, kml_recno,
600                        fset->fset_name);
601
602         if ( rc ) {
603                 write_unlock(&fd->fd_lock);
604                 EXIT;
605                 return rc;
606         }
607
608         fset->fset_lento_off = offset;
609         fset->fset_lento_recno = kml_recno; 
610         write_unlock(&fd->fd_lock);
611         EXIT;
612         return 0;
613 }
614
615 int izo_lookup_file(struct presto_file_set *fset, char *path,
616                     struct nameidata *nd)
617 {
618         int error = 0;
619
620         CDEBUG(D_CACHE, "looking up: %s\n", path);
621
622         error = path_lookup(path, LOOKUP_PARENT, nd);
623         if (error) {
624                 EXIT;
625                 return error;
626         }
627
628         return 0;
629 }
630
631 /* FIXME: this function is a mess of locking and error handling.  There's got to
632  * be a better way. */
633 static int do_truncate_rename(struct presto_file_set *fset, char *oldname,
634                               char *newname)
635 {
636         struct dentry *old_dentry, *new_dentry;
637         struct nameidata oldnd, newnd;
638         char *oldpath, *newpath;
639         int error;
640
641         ENTRY;
642
643         oldpath = izo_make_path(fset, oldname);
644         if (oldpath == NULL) {
645                 EXIT;
646                 return -ENOENT;
647         }
648
649         newpath = izo_make_path(fset, newname);
650         if (newpath == NULL) {
651                 error = -ENOENT;
652                 EXIT;
653                 goto exit;
654         }
655
656         if ((error = izo_lookup_file(fset, oldpath, &oldnd)) != 0) {
657                 EXIT;
658                 goto exit1;
659         }
660
661         if ((error = izo_lookup_file(fset, newpath, &newnd)) != 0) {
662                 EXIT;
663                 goto exit2;
664         }
665
666         lock_rename(newnd.dentry, oldnd.dentry);
667         old_dentry = lookup_hash(&oldnd.last, oldnd.dentry);
668         error = PTR_ERR(old_dentry);
669         if (IS_ERR(old_dentry)) {
670                 EXIT;
671                 goto exit3;
672         }
673         error = -ENOENT;
674         if (!old_dentry->d_inode) {
675                 EXIT;
676                 goto exit4;
677         }
678         new_dentry = lookup_hash(&newnd.last, newnd.dentry);
679         error = PTR_ERR(new_dentry);
680         if (IS_ERR(new_dentry)) {
681                 EXIT;
682                 goto exit4;
683         }
684
685         {
686         extern int presto_rename(struct inode *old_dir,struct dentry *old_dentry,
687                                 struct inode *new_dir,struct dentry *new_dentry);
688         error = presto_rename(old_dentry->d_parent->d_inode, old_dentry,
689                               new_dentry->d_parent->d_inode, new_dentry);
690         }
691
692         dput(new_dentry);
693         EXIT;
694  exit4:
695         dput(old_dentry);
696  exit3:
697         unlock_rename(newnd.dentry, oldnd.dentry);
698         path_release(&newnd);
699  exit2:
700         path_release(&oldnd);
701  exit1:
702         PRESTO_FREE(newpath, strlen(newpath) + 1);
703  exit:
704         PRESTO_FREE(oldpath, strlen(oldpath) + 1);
705         return error;
706 }
707
708 /* This function is called with the fset->fset_kml.fd_lock held */
709 int presto_finish_kml_truncate(struct presto_file_set *fset,
710                                unsigned long int offset)
711 {
712         struct lento_vfs_context info;
713         void *handle;
714         struct file *f;
715         struct dentry *dentry;
716         int error = 0, len;
717         struct nameidata nd;
718         char *kmlpath = NULL, *smlpath = NULL;
719         ENTRY;
720
721         if (offset == 0) {
722                 /* Lento couldn't do what it needed to; abort the truncation. */
723                 fset->fset_kml.fd_truncating = 0;
724                 EXIT;
725                 return 0;
726         }
727
728         /* someone is about to write to the end of the KML; try again later. */
729         if ( !list_empty(&fset->fset_kml.fd_reservations) ) {
730                 EXIT;
731                 return -EAGAIN;
732         }
733
734         f = presto_copy_kml_tail(fset, offset);
735         if (IS_ERR(f)) {
736                 EXIT;
737                 return PTR_ERR(f);
738         }                        
739
740         /* In a single transaction:
741          *
742          *   - unlink 'kml'
743          *   - rename 'kml_tmp' to 'kml'
744          *   - unlink 'sml'
745          *   - rename 'sml_tmp' to 'sml'
746          *   - rewrite the first record of last_rcvd with the new kml
747          *     offset.
748          */
749         handle = presto_trans_start(fset, fset->fset_dentry->d_inode,
750                                     KML_OPCODE_KML_TRUNC);
751         if (IS_ERR(handle)) {
752                 presto_release_space(fset->fset_cache, PRESTO_REQLOW);
753                 CERROR("ERROR: presto_finish_kml_truncate: no space for transaction\n");
754                 EXIT;
755                 return -ENOMEM;
756         }
757
758         memset(&info, 0, sizeof(info));
759         info.flags = LENTO_FL_IGNORE_TIME;
760
761         kmlpath = izo_make_path(fset, "kml");
762         if (kmlpath == NULL) {
763                 error = -ENOMEM;
764                 CERROR("make_path failed: ENOMEM\n");
765                 EXIT;
766                 goto exit_commit;
767         }
768
769         if ((error = izo_lookup_file(fset, kmlpath, &nd)) != 0) {
770                 CERROR("izo_lookup_file(kml) failed: %d.\n", error);
771                 EXIT;
772                 goto exit_commit;
773         }
774         down(&nd.dentry->d_inode->i_sem);
775         dentry = lookup_hash(&nd.last, nd.dentry);
776         error = PTR_ERR(dentry);
777         if (IS_ERR(dentry)) {
778                 up(&nd.dentry->d_inode->i_sem);
779                 path_release(&nd);
780                 CERROR("lookup_hash failed\n");
781                 EXIT;
782                 goto exit_commit;
783         }
784         error = presto_do_unlink(fset, dentry->d_parent, dentry, &info);
785         dput(dentry);
786         up(&nd.dentry->d_inode->i_sem);
787         path_release(&nd);
788
789         if (error != 0) {
790                 CERROR("presto_do_unlink(kml) failed: %d.\n", error);
791                 EXIT;
792                 goto exit_commit;
793         }
794
795         smlpath = izo_make_path(fset, "sml");
796         if (smlpath == NULL) {
797                 error = -ENOMEM;
798                 CERROR("make_path() failed: ENOMEM\n");
799                 EXIT;
800                 goto exit_commit;
801         }
802
803         if ((error = izo_lookup_file(fset, smlpath, &nd)) != 0) {
804                 CERROR("izo_lookup_file(sml) failed: %d.\n", error);
805                 EXIT;
806                 goto exit_commit;
807         }
808         down(&nd.dentry->d_inode->i_sem);
809         dentry = lookup_hash(&nd.last, nd.dentry);
810         error = PTR_ERR(dentry);
811         if (IS_ERR(dentry)) {
812                 up(&nd.dentry->d_inode->i_sem);
813                 path_release(&nd);
814                 CERROR("lookup_hash failed\n");
815                 EXIT;
816                 goto exit_commit;
817         }
818         error = presto_do_unlink(fset, dentry->d_parent, dentry, &info);
819         dput(dentry);
820         up(&nd.dentry->d_inode->i_sem);
821         path_release(&nd);
822
823         if (error != 0) {
824                 CERROR("presto_do_unlink(sml) failed: %d.\n", error);
825                 EXIT;
826                 goto exit_commit;
827         }
828
829         error = do_truncate_rename(fset, "kml_tmp", "kml");
830         if (error != 0)
831                 CERROR("do_truncate_rename(kml_tmp, kml) failed: %d\n", error);
832         error = do_truncate_rename(fset, "sml_tmp", "sml");
833         if (error != 0)
834                 CERROR("do_truncate_rename(sml_tmp, sml) failed: %d\n", error);
835
836         /* Write a new 'last_rcvd' record with the new KML offset */
837         fset->fset_kml_logical_off += offset;
838         CDEBUG(D_CACHE, "new kml_logical_offset: %Lu\n",
839                fset->fset_kml_logical_off);
840         if (presto_write_kml_logical_offset(fset) != 0) {
841                 CERROR("presto_write_kml_logical_offset failed\n");
842         }
843
844         presto_trans_commit(fset, handle);
845
846         /* Everything was successful, so swap the KML file descriptors */
847         filp_close(fset->fset_kml.fd_file, NULL);
848         fset->fset_kml.fd_file = f;
849         fset->fset_kml.fd_offset -= offset;
850         fset->fset_kml.fd_truncating = 0;
851
852         EXIT;
853         return 0;
854
855  exit_commit:
856         presto_trans_commit(fset, handle);
857         len = strlen("/.intermezzo/") + strlen(fset->fset_name) +strlen("sml");
858         if (kmlpath != NULL)
859                 PRESTO_FREE(kmlpath, len);
860         if (smlpath != NULL)
861                 PRESTO_FREE(smlpath, len);
862         return error;
863 }
864
865 /* structure of an extended log record:
866
867    buf-prefix  buf-body [string1 [string2 [string3]]] buf-suffix
868
869    note: moves offset forward
870 */
871 static inline int presto_write_record(struct file *f, loff_t *off,
872                         const char *buf, size_t size,
873                         const char *string1, int len1, 
874                         const char *string2, int len2,
875                         const char *string3, int len3)
876 {
877         size_t prefix_size; 
878         int rc;
879
880         prefix_size = size - sizeof(struct kml_suffix);
881         rc = presto_fwrite(f, buf, prefix_size, off);
882         if ( rc != prefix_size ) {
883                 CERROR("Write error!\n");
884                 EXIT;
885                 return -EIO;
886         }
887
888         if  ( string1  && len1 ) {
889                 rc = presto_fwrite(f, string1, len1, off);
890                 if ( rc != len1 ) {
891                         CERROR("Write error!\n");
892                         EXIT;
893                         return -EIO;
894                 }
895         }
896
897         if  ( string2 && len2 ) {
898                 rc = presto_fwrite(f, string2, len2, off);
899                 if ( rc != len2 ) {
900                         CERROR("Write error!\n");
901                         EXIT;
902                         return -EIO;
903                 }
904         }
905
906         if  ( string3 && len3 ) {
907                 rc = presto_fwrite(f, string3, len3, off);
908                 if ( rc != len3 ) {
909                         CERROR("Write error!\n");
910                         EXIT;
911                         return -EIO;
912                 }
913         }
914
915         rc = presto_fwrite(f, buf + prefix_size,
916                            sizeof(struct kml_suffix), off);
917         if ( rc != sizeof(struct kml_suffix) ) {
918                 CERROR("Write error!\n");
919                 EXIT;
920                 return -EIO;
921         }
922         return 0;
923 }
924
925
926 /*
927  * rec->size must be valid prior to calling this function.
928  *
929  * had to export this for branch_reinter in kml_reint.c 
930  */
931 int presto_log(struct presto_file_set *fset, struct rec_info *rec,
932                const char *buf, size_t size,
933                const char *string1, int len1, 
934                const char *string2, int len2,
935                const char *string3, int len3)
936 {
937         int rc;
938         struct presto_reservation_data rd;
939         loff_t offset;
940         struct presto_log_fd *fd;
941         struct kml_suffix *s;
942         int prefix_size; 
943
944         ENTRY;
945
946         /* buf is NULL when no_journal is in effect */
947         if (!buf) {
948                 EXIT;
949                 return -EINVAL;
950         }
951
952         if (rec->is_kml) {
953                 fd = &fset->fset_kml;
954         } else {
955                 fd = &fset->fset_lml;
956         }
957
958         presto_reserve_record(fset, fd, rec, &rd);
959
960         if (rec->is_kml) {
961                 if (rec->offset < fset->fset_kml_logical_off) {
962                         CERROR("record with pre-trunc offset.  tell phil.\n");
963                         BUG();
964                 }
965                 offset = rec->offset - fset->fset_kml_logical_off;
966         } else {
967                 offset = rec->offset;
968         }
969
970         /* now we know the record number */ 
971         prefix_size = size - sizeof(struct kml_suffix);
972         s = (struct kml_suffix *) (buf + prefix_size); 
973         s->recno = cpu_to_le32(rec->recno); 
974
975         rc = presto_write_record(fd->fd_file, &offset, buf, size, 
976                                  string1, len1, string2, len2, string3, len3); 
977         if (rc) {
978                 CERROR("presto: error writing record to %s\n",
979                         rec->is_kml ? "KML" : "LML"); 
980                 return rc;
981         }
982         presto_release_record(fd, &rd);
983
984         rc = presto_kml_dispatch(fset);
985
986         EXIT;
987         return rc;
988 }
989
990 /* read from the record at tail */
991 static int presto_last_record(struct presto_log_fd *fd, loff_t *size, 
992                              loff_t *tail_offset, __u32 *recno, loff_t tail)
993 {
994         struct kml_suffix suffix;
995         int rc;
996         loff_t zeroes;
997
998         *recno = 0;
999         *tail_offset = 0;
1000         *size = 0;
1001         
1002         if (tail < sizeof(struct kml_prefix_hdr) + sizeof(suffix)) {
1003                 EXIT;
1004                 return 0;
1005         }
1006
1007         zeroes = tail - sizeof(int);
1008         while ( zeroes >= 0 ) {
1009                 int data;
1010                 rc = presto_fread(fd->fd_file, (char *)&data, sizeof(data), 
1011                                   &zeroes);
1012                 if ( rc != sizeof(data) ) { 
1013                         rc = -EIO;
1014                         return rc;
1015                 }
1016                 if (data)
1017                         break;
1018                 zeroes -= 2 * sizeof(data);
1019         }
1020
1021         /* zeroes at the begining of file. this is needed to prevent
1022            presto_fread errors  -SHP
1023         */
1024         if (zeroes <= 0) return 0;
1025                        
1026         zeroes -= sizeof(suffix) + sizeof(int);
1027         rc = presto_fread(fd->fd_file, (char *)&suffix, sizeof(suffix), &zeroes);
1028         if ( rc != sizeof(suffix) ) {
1029                 EXIT;
1030                 return rc;
1031         }
1032         if ( suffix.len > 500 ) {
1033                 CERROR("InterMezzo: Warning long record tail at %ld, rec tail_offset at %ld (size %d)\n", 
1034                         (long) zeroes, (long)*tail_offset, suffix.len); 
1035         }
1036
1037         *recno = suffix.recno;
1038         *size = suffix.len;
1039         *tail_offset = zeroes;
1040         return 0;
1041 }
1042
1043 static int izo_kml_last_recno(struct presto_log_fd *logfd)
1044 {
1045         int rc; 
1046         loff_t size;
1047         loff_t tail_offset;
1048         int recno;
1049         loff_t tail = logfd->fd_file->f_dentry->d_inode->i_size;
1050
1051         rc = presto_last_record(logfd, &size, &tail_offset, &recno, tail);
1052         if (rc != 0) {
1053                 EXIT;
1054                 return rc;
1055         }
1056
1057         logfd->fd_offset = tail_offset;
1058         logfd->fd_recno = recno;
1059         CDEBUG(D_JOURNAL, "setting fset_kml->fd_recno to %d, offset  %Ld\n",
1060                recno, tail_offset); 
1061         EXIT;
1062         return 0;
1063 }
1064
1065 struct file *izo_log_open(struct presto_file_set *fset, char *name, int flags)
1066 {
1067         struct presto_cache *cache = fset->fset_cache;
1068         struct file *f;
1069         int error;
1070         ENTRY;
1071
1072         f = izo_fset_open(fset, name, flags, 0644);
1073         error = PTR_ERR(f);
1074         if (IS_ERR(f)) {
1075                 EXIT;
1076                 return f;
1077         }
1078
1079         error = -EINVAL;
1080         if ( cache != presto_get_cache(f->f_dentry->d_inode) ) {
1081                 CERROR("InterMezzo: %s cache does not match fset cache!\n",name);
1082                 fset->fset_kml.fd_file = NULL;
1083                 filp_close(f, NULL);
1084                 f = NULL;
1085                 EXIT;
1086                 return f;
1087         }
1088
1089         if (cache->cache_filter &&  cache->cache_filter->o_trops &&
1090             cache->cache_filter->o_trops->tr_journal_data) {
1091                 cache->cache_filter->o_trops->tr_journal_data
1092                         (f->f_dentry->d_inode);
1093         } else {
1094                 CERROR("InterMezzo WARNING: no file data logging!\n"); 
1095         }
1096
1097         EXIT;
1098
1099         return f;
1100 }
1101
1102 int izo_init_kml_file(struct presto_file_set *fset, struct presto_log_fd *logfd)
1103 {
1104         int error = 0;
1105         struct file *f;
1106
1107         ENTRY;
1108         if (logfd->fd_file) {
1109                 CDEBUG(D_INODE, "fset already has KML open\n");
1110                 EXIT;
1111                 return 0;
1112         }
1113
1114         logfd->fd_lock = RW_LOCK_UNLOCKED;
1115         INIT_LIST_HEAD(&logfd->fd_reservations); 
1116         f = izo_log_open(fset, "kml",  O_RDWR | O_CREAT);
1117         if (IS_ERR(f)) {
1118                 error = PTR_ERR(f);
1119                 return error;
1120         }
1121
1122         logfd->fd_file = f;
1123         error = izo_kml_last_recno(logfd);
1124
1125         if (error) {
1126                 logfd->fd_file = NULL;
1127                 filp_close(f, NULL);
1128                 CERROR("InterMezzo: IO error in KML of fset %s\n",
1129                        fset->fset_name);
1130                 EXIT;
1131                 return error;
1132         }
1133         fset->fset_lento_off = logfd->fd_offset;
1134         fset->fset_lento_recno = logfd->fd_recno;
1135
1136         EXIT;
1137         return error;
1138 }
1139
1140 int izo_init_last_rcvd_file(struct presto_file_set *fset, struct presto_log_fd *logfd)
1141 {
1142         int error = 0;
1143         struct file *f;
1144         struct rec_info recinfo;
1145
1146         ENTRY;
1147         if (logfd->fd_file != NULL) {
1148                 CDEBUG(D_INODE, "fset already has last_rcvd open\n");
1149                 EXIT;
1150                 return 0;
1151         }
1152
1153         logfd->fd_lock = RW_LOCK_UNLOCKED;
1154         INIT_LIST_HEAD(&logfd->fd_reservations); 
1155         f = izo_log_open(fset, "last_rcvd", O_RDWR | O_CREAT);
1156         if (IS_ERR(f)) {
1157                 error = PTR_ERR(f);
1158                 return error;
1159         }
1160
1161         logfd->fd_file = f;
1162         logfd->fd_offset = f->f_dentry->d_inode->i_size;
1163
1164         error = izo_rep_cache_init(fset);
1165
1166         if (presto_read_kml_logical_offset(&recinfo, fset) == 0) {
1167                 fset->fset_kml_logical_off = recinfo.offset;
1168         } else {
1169                 /* The 'last_rcvd' file doesn't contain a kml offset record,
1170                  * probably because we just created 'last_rcvd'.  Write one. */
1171                 fset->fset_kml_logical_off = 0;
1172                 presto_write_kml_logical_offset(fset);
1173         }
1174
1175         EXIT;
1176         return error;
1177 }
1178
1179 int izo_init_lml_file(struct presto_file_set *fset, struct presto_log_fd *logfd)
1180 {
1181         int error = 0;
1182         struct file *f;
1183
1184         ENTRY;
1185         if (logfd->fd_file) {
1186                 CDEBUG(D_INODE, "fset already has lml open\n");
1187                 EXIT;
1188                 return 0;
1189         }
1190
1191         logfd->fd_lock = RW_LOCK_UNLOCKED;
1192         INIT_LIST_HEAD(&logfd->fd_reservations); 
1193         f = izo_log_open(fset, "lml", O_RDWR | O_CREAT);
1194         if (IS_ERR(f)) {
1195                 error = PTR_ERR(f);
1196                 return error;
1197         }
1198
1199         logfd->fd_file = f;
1200         logfd->fd_offset = f->f_dentry->d_inode->i_size;
1201
1202         EXIT;
1203         return error;
1204 }
1205
1206 /* Get the KML-offset record from the last_rcvd file */
1207 int presto_read_kml_logical_offset(struct rec_info *recinfo,
1208                                    struct presto_file_set *fset)
1209 {
1210         loff_t off;
1211         struct izo_rcvd_rec rec;
1212         char uuid[16] = {0};
1213
1214         off = izo_rcvd_get(&rec, fset, uuid);
1215         if (off < 0)
1216                 return -1;
1217
1218         recinfo->offset = rec.lr_local_offset;
1219         return 0;
1220 }
1221
1222 int presto_write_kml_logical_offset(struct presto_file_set *fset)
1223 {
1224         loff_t rc;
1225         struct izo_rcvd_rec rec;
1226         char uuid[16] = {0};
1227
1228         rc = izo_rcvd_get(&rec, fset, uuid);
1229         if (rc < 0)
1230                 memset(&rec, 0, sizeof(rec));
1231
1232         rec.lr_local_offset =
1233                 cpu_to_le64(fset->fset_kml_logical_off);
1234
1235         return izo_rcvd_write(fset, &rec);
1236 }
1237
1238 struct file * presto_copy_kml_tail(struct presto_file_set *fset,
1239                                    unsigned long int start)
1240 {
1241         struct file *f;
1242         int len;
1243         loff_t read_off, write_off, bytes;
1244
1245         ENTRY;
1246
1247         /* Copy the tail of 'kml' to 'kml_tmp' */
1248         f = izo_log_open(fset, "kml_tmp", O_RDWR);
1249         if (IS_ERR(f)) {
1250                 EXIT;
1251                 return f;
1252         }
1253
1254         write_off = 0;
1255         read_off = start;
1256         bytes = fset->fset_kml.fd_offset - start;
1257         while (bytes > 0) {
1258                 char buf[4096];
1259                 int toread;
1260
1261                 if (bytes > sizeof(buf))
1262                         toread = sizeof(buf);
1263                 else
1264                         toread = bytes;
1265
1266                 len = presto_fread(fset->fset_kml.fd_file, buf, toread,
1267                                    &read_off);
1268                 if (len <= 0)
1269                         break;
1270
1271                 if (presto_fwrite(f, buf, len, &write_off) != len) {
1272                         filp_close(f, NULL);
1273                         EXIT;
1274                         return ERR_PTR(-EIO);
1275                 }
1276
1277                 bytes -= len;
1278         }
1279
1280         EXIT;
1281         return f;
1282 }
1283
1284
1285 /* LML records here */
1286 /* this writes an LML record to the LML file (rec->is_kml =0)  */
1287 int presto_write_lml_close(struct rec_info *rec,
1288                            struct presto_file_set *fset, 
1289                            struct file *file,
1290                            __u64 remote_ino,
1291                            __u64 remote_generation,
1292                            struct presto_version *remote_version,
1293                            struct presto_version *new_file_ver)
1294 {
1295         int opcode = KML_OPCODE_CLOSE;
1296         char *buffer;
1297         struct dentry *dentry = file->f_dentry; 
1298         __u64 ino;
1299         __u32 pathlen;
1300         char *path;
1301         __u32 generation;
1302         int size;
1303         char *logrecord;
1304         char record[292];
1305         struct dentry *root;
1306         int error;
1307
1308         ENTRY;
1309
1310         if ( presto_no_journal(fset) ) {
1311           EXIT;
1312           return 0;
1313         }
1314         root = fset->fset_dentry;
1315
1316         BUFF_ALLOC(buffer, NULL);
1317         path = presto_path(dentry, root, buffer, PAGE_SIZE);
1318         CDEBUG(D_INODE, "Path: %s\n", path);
1319         pathlen = cpu_to_le32(MYPATHLEN(buffer, path));
1320         ino = cpu_to_le64(dentry->d_inode->i_ino);
1321         generation = cpu_to_le32(dentry->d_inode->i_generation);
1322         size =  sizeof(__u32) * current->group_info->ngroups + 
1323                 sizeof(struct kml_prefix_hdr) + sizeof(*new_file_ver) +
1324                 sizeof(ino) + sizeof(generation) + sizeof(pathlen) +
1325                 sizeof(remote_ino) + sizeof(remote_generation) + 
1326                 sizeof(remote_version) + sizeof(rec->offset) +
1327                 sizeof(struct kml_suffix);
1328
1329         if ( size > sizeof(record) )
1330                 CERROR("InterMezzo: BUFFER OVERFLOW in %s!\n", __FUNCTION__);
1331         
1332         rec->is_kml = 0;
1333         rec->size = size + size_round(le32_to_cpu(pathlen));
1334
1335         logrecord = journal_log_prefix(record, opcode, rec);
1336         logrecord = log_version(logrecord, new_file_ver);
1337         logrecord = logit(logrecord, &ino, sizeof(ino));
1338         logrecord = logit(logrecord, &generation, sizeof(generation));
1339         logrecord = logit(logrecord, &pathlen, sizeof(pathlen));
1340         logrecord = logit(logrecord, &remote_ino, sizeof(remote_ino));
1341         logrecord = logit(logrecord, &remote_generation,
1342                           sizeof(remote_generation));
1343         logrecord = log_version(logrecord, remote_version);
1344         logrecord = logit(logrecord, &rec->offset, sizeof(rec->offset));
1345         logrecord = journal_log_suffix(logrecord, record, fset, dentry, rec);
1346
1347         error = presto_log(fset, rec, record, size,
1348                            path, size_round(le32_to_cpu(pathlen)),
1349                            NULL, 0, NULL, 0);
1350
1351         BUFF_FREE(buffer);
1352
1353         EXIT;
1354         return error;
1355 }
1356
1357 /* 
1358  * Check if the given record is at the end of the file. If it is, truncate
1359  * the lml to the record's offset, removing it. Repeat on prior record,
1360  * until we reach an active record or a reserved record (as defined by the
1361  * reservations list).
1362  */
1363 static int presto_truncate_lml_tail(struct presto_file_set *fset)
1364 {
1365         loff_t lml_tail;
1366         loff_t lml_last_rec;
1367         loff_t lml_last_recsize;
1368         loff_t local_offset;
1369         int recno;
1370         struct kml_prefix_hdr prefix;
1371         struct inode *inode = fset->fset_lml.fd_file->f_dentry->d_inode;
1372         void *handle;
1373         int rc;
1374
1375         ENTRY;
1376         /* If someone else is already truncating the LML, return. */
1377         write_lock(&fset->fset_lml.fd_lock); 
1378         if (fset->fset_lml.fd_truncating == 1 ) {
1379                 write_unlock(&fset->fset_lml.fd_lock); 
1380                 EXIT;
1381                 return 0;
1382         }
1383         /* someone is about to write to the end of the LML */ 
1384         if ( !list_empty(&fset->fset_lml.fd_reservations) ) {
1385                 write_unlock(&fset->fset_lml.fd_lock); 
1386                 EXIT;
1387                 return 0;
1388         }
1389        lml_tail = fset->fset_lml.fd_file->f_dentry->d_inode->i_size;
1390        /* Nothing to truncate?*/
1391        if (lml_tail == 0) {
1392                 write_unlock(&fset->fset_lml.fd_lock); 
1393                 EXIT;
1394                 return 0;
1395        }
1396        fset->fset_lml.fd_truncating = 1;
1397        write_unlock(&fset->fset_lml.fd_lock); 
1398
1399        presto_last_record(&fset->fset_lml, &lml_last_recsize,
1400                           &lml_last_rec, &recno, lml_tail);
1401        /* Do we have a record to check? If not we have zeroes at the
1402           beginning of the file. -SHP
1403        */
1404        if (lml_last_recsize != 0) {
1405                 local_offset = lml_last_rec - lml_last_recsize;
1406                 rc = presto_fread(fset->fset_lml.fd_file, (char *)&prefix,  
1407                                         sizeof(prefix), &local_offset); 
1408                 if (rc != sizeof(prefix)) {
1409                         EXIT;
1410                         goto tr_out;
1411                 }
1412        
1413                 if ( prefix.opcode != KML_OPCODE_NOOP ) {
1414                         EXIT;
1415                         rc = 0;
1416                         /* We may have zeroes at the end of the file, should
1417                            we clear them out? -SHP
1418                         */
1419                         goto tr_out;
1420                 }
1421         } else 
1422                 lml_last_rec=0;
1423
1424         handle = presto_trans_start(fset, inode, KML_OPCODE_TRUNC);
1425         if ( IS_ERR(handle) ) {
1426                 EXIT;
1427                 rc = -ENOMEM;
1428                 goto tr_out;
1429         }
1430
1431         rc = izo_do_truncate(fset, fset->fset_lml.fd_file->f_dentry, 
1432                                 lml_last_rec - lml_last_recsize, lml_tail);
1433         presto_trans_commit(fset, handle); 
1434         if ( rc == 0 ) {
1435                 rc = 1;
1436         }
1437         EXIT;
1438
1439  tr_out:
1440         CDEBUG(D_JOURNAL, "rc = %d\n", rc);
1441         write_lock(&fset->fset_lml.fd_lock);
1442         fset->fset_lml.fd_truncating = 0;
1443         write_unlock(&fset->fset_lml.fd_lock);
1444         return rc;
1445 }
1446
1447 int presto_truncate_lml(struct presto_file_set *fset)
1448 {
1449         int rc; 
1450         ENTRY;
1451         
1452         while ( (rc = presto_truncate_lml_tail(fset)) > 0);
1453         if ( rc < 0 && rc != -EALREADY) {
1454                 CERROR("truncate_lml error %d\n", rc); 
1455         }
1456         EXIT;
1457         return rc;
1458 }
1459
1460 int presto_clear_lml_close(struct presto_file_set *fset, loff_t lml_offset)
1461 {
1462         int rc;
1463         struct kml_prefix_hdr record;
1464         loff_t offset = lml_offset;
1465
1466         ENTRY;
1467
1468         if ( presto_no_journal(fset) ) {
1469                 EXIT;
1470                 return 0;
1471         }
1472
1473         CDEBUG(D_JOURNAL, "reading prefix: off %ld, size %Zd\n", 
1474                (long)lml_offset, sizeof(record));
1475         rc = presto_fread(fset->fset_lml.fd_file, (char *)&record,
1476                           sizeof(record), &offset);
1477
1478         if ( rc != sizeof(record) ) {
1479                 CERROR("presto: clear_lml io error %d\n", rc); 
1480                 EXIT;
1481                 return -EIO;
1482         }
1483
1484         /* overwrite the prefix */ 
1485         CDEBUG(D_JOURNAL, "overwriting prefix: off %ld\n", (long)lml_offset);
1486         record.opcode = KML_OPCODE_NOOP;
1487         offset = lml_offset;
1488         /* note: this does just a single transaction in the cache */
1489         rc = presto_fwrite(fset->fset_lml.fd_file, (char *)(&record), 
1490                               sizeof(record), &offset);
1491         if ( rc != sizeof(record) ) {
1492                 EXIT;
1493                 return -EIO;
1494         }
1495
1496         EXIT;
1497         return 0; 
1498 }
1499
1500
1501
1502 /* now a journal function for every operation */
1503
1504 int presto_journal_setattr(struct rec_info *rec, struct presto_file_set *fset,
1505                            struct dentry *dentry, struct presto_version *old_ver,
1506                            struct izo_rollback_data *rb, struct iattr *iattr)
1507 {
1508         int opcode = KML_OPCODE_SETATTR;
1509         char *buffer, *path, *logrecord, record[316];
1510         struct dentry *root;
1511         __u32 uid, gid, mode, valid, flags, pathlen;
1512         __u64 fsize, mtime, ctime;
1513         int error, size;
1514
1515         ENTRY;
1516         if ( presto_no_journal(fset) ) {
1517                 EXIT;
1518                 return 0;
1519         }
1520
1521         if (!dentry->d_inode || (dentry->d_inode->i_nlink == 0) 
1522             || ((dentry->d_parent != dentry) && d_unhashed(dentry))) {
1523                 EXIT;
1524                 return 0;
1525         }
1526
1527         root = fset->fset_dentry;
1528
1529         BUFF_ALLOC(buffer, NULL);
1530         path = presto_path(dentry, root, buffer, PAGE_SIZE);
1531         pathlen = cpu_to_le32(MYPATHLEN(buffer, path));
1532         size =  sizeof(__u32) * current->group_info->ngroups + 
1533                 sizeof(struct kml_prefix_hdr) + sizeof(*old_ver) +
1534                 sizeof(valid) + sizeof(mode) + sizeof(uid) + sizeof(gid) +
1535                 sizeof(fsize) + sizeof(mtime) + sizeof(ctime) + sizeof(flags) +
1536                 sizeof(pathlen) + sizeof(*rb) + sizeof(struct kml_suffix);
1537
1538         if ( size > sizeof(record) )
1539                 CERROR("InterMezzo: BUFFER OVERFLOW in %s!\n", __FUNCTION__);
1540
1541         /* Only journal one kind of mtime, and not atime at all.  Also don't
1542          * journal bogus data in iattr, to make the journal more compressible.
1543          */
1544         if (iattr->ia_valid & ATTR_MTIME_SET)
1545                 iattr->ia_valid = iattr->ia_valid | ATTR_MTIME;
1546         valid = cpu_to_le32(iattr->ia_valid & ~(ATTR_ATIME | ATTR_MTIME_SET |
1547                                                 ATTR_ATIME_SET));
1548         mode = iattr->ia_valid & ATTR_MODE ? cpu_to_le32(iattr->ia_mode): 0;
1549         uid = iattr->ia_valid & ATTR_UID ? cpu_to_le32(iattr->ia_uid): 0;
1550         gid = iattr->ia_valid & ATTR_GID ? cpu_to_le32(iattr->ia_gid): 0;
1551         fsize = iattr->ia_valid & ATTR_SIZE ? cpu_to_le64(iattr->ia_size): 0;
1552         mtime = iattr->ia_valid & ATTR_MTIME ? cpu_to_le64(iattr->ia_mtime.tv_sec): 0;
1553         ctime = iattr->ia_valid & ATTR_CTIME ? cpu_to_le64(iattr->ia_ctime.tv_sec): 0;
1554         flags = iattr->ia_valid & ATTR_ATTR_FLAG ?
1555                 cpu_to_le32(iattr->ia_attr_flags): 0;
1556
1557         rec->is_kml = 1;
1558         rec->size = size + size_round(le32_to_cpu(pathlen));
1559
1560         logrecord = journal_log_prefix(record, opcode, rec);
1561         logrecord = log_version(logrecord, old_ver);
1562         logrecord = logit(logrecord, &valid, sizeof(valid));
1563         logrecord = logit(logrecord, &mode, sizeof(mode));
1564         logrecord = logit(logrecord, &uid, sizeof(uid));
1565         logrecord = logit(logrecord, &gid, sizeof(gid));
1566         logrecord = logit(logrecord, &fsize, sizeof(fsize));
1567         logrecord = logit(logrecord, &mtime, sizeof(mtime));
1568         logrecord = logit(logrecord, &ctime, sizeof(ctime));
1569         logrecord = logit(logrecord, &flags, sizeof(flags));
1570         logrecord = log_rollback(logrecord, rb);
1571         logrecord = logit(logrecord, &pathlen, sizeof(pathlen));
1572         logrecord = journal_log_suffix(logrecord, record, fset, dentry, rec);
1573
1574         error = presto_log(fset, rec, record, size,
1575                            path, size_round(le32_to_cpu(pathlen)),
1576                            NULL, 0, NULL, 0);
1577
1578         BUFF_FREE(buffer);
1579         EXIT;
1580         return error;
1581 }
1582
1583 int presto_get_fileid(int minor, struct presto_file_set *fset,
1584                       struct dentry *dentry)
1585 {
1586         int opcode = KML_OPCODE_GET_FILEID;
1587         struct rec_info rec;
1588         char *buffer, *path, *logrecord, record[4096]; /*include path*/
1589         struct dentry *root;
1590         __u32 uid, gid, pathlen;
1591         int error, size;
1592         struct kml_suffix *suffix;
1593
1594         ENTRY;
1595
1596         root = fset->fset_dentry;
1597
1598         uid = cpu_to_le32(dentry->d_inode->i_uid);
1599         gid = cpu_to_le32(dentry->d_inode->i_gid);
1600         BUFF_ALLOC(buffer, NULL);
1601         path = presto_path(dentry, root, buffer, PAGE_SIZE);
1602         pathlen = cpu_to_le32(MYPATHLEN(buffer, path));
1603         size =  sizeof(__u32) * current->group_info->ngroups + 
1604                 sizeof(struct kml_prefix_hdr) + sizeof(pathlen) +
1605                 size_round(le32_to_cpu(pathlen)) +
1606                 sizeof(struct kml_suffix);
1607
1608         CDEBUG(D_FILE, "kml size: %d\n", size);
1609         if ( size > sizeof(record) )
1610                 CERROR("InterMezzo: BUFFER OVERFLOW in %s!\n", __FUNCTION__);
1611
1612         memset(&rec, 0, sizeof(rec));
1613         rec.is_kml = 1;
1614         rec.size = size;
1615
1616         logrecord = journal_log_prefix(record, opcode, &rec);
1617         logrecord = logit(logrecord, &pathlen, sizeof(pathlen));
1618         logrecord = logit(logrecord, path, size_round(le32_to_cpu(pathlen)));
1619         suffix = (struct kml_suffix *)logrecord;
1620         logrecord = journal_log_suffix(logrecord, record, fset, dentry, &rec);
1621         /* journal_log_suffix expects journal_log to set this */
1622         suffix->recno = 0;
1623
1624         CDEBUG(D_FILE, "actual kml size: %Zd\n", logrecord - record);
1625         CDEBUG(D_FILE, "get fileid: uid %d, gid %d, path: %s\n", uid, gid,path);
1626
1627         error = izo_upc_get_fileid(minor, size, record, 
1628                                    size_round(le32_to_cpu(pathlen)), path,
1629                                    fset->fset_name);
1630
1631         BUFF_FREE(buffer);
1632         EXIT;
1633         return error;
1634 }
1635
1636 int presto_journal_create(struct rec_info *rec, struct presto_file_set *fset,
1637                           struct dentry *dentry,
1638                           struct presto_version *tgt_dir_ver,
1639                           struct presto_version *new_file_ver, int mode)
1640 {
1641         int opcode = KML_OPCODE_CREATE;
1642         char *buffer, *path, *logrecord, record[292];
1643         struct dentry *root;
1644         __u32 uid, gid, lmode, pathlen;
1645         int error, size;
1646
1647         ENTRY;
1648         if ( presto_no_journal(fset) ) {
1649                 EXIT;
1650                 return 0;
1651         }
1652
1653         root = fset->fset_dentry;
1654
1655         uid = cpu_to_le32(dentry->d_inode->i_uid);
1656         gid = cpu_to_le32(dentry->d_inode->i_gid);
1657         lmode = cpu_to_le32(mode);
1658  
1659         BUFF_ALLOC(buffer, NULL);
1660         path = presto_path(dentry, root, buffer, PAGE_SIZE);
1661         pathlen = cpu_to_le32(MYPATHLEN(buffer, path));
1662         size =  sizeof(__u32) * current->group_info->ngroups + 
1663                 sizeof(struct kml_prefix_hdr) + 3 * sizeof(*tgt_dir_ver) +
1664                 sizeof(lmode) + sizeof(uid) + sizeof(gid) + sizeof(pathlen) +
1665                 sizeof(struct kml_suffix);
1666
1667         if ( size > sizeof(record) )
1668                 CERROR("InterMezzo: BUFFER OVERFLOW in %s!\n", __FUNCTION__);
1669
1670         rec->is_kml = 1;
1671         rec->size = size + size_round(le32_to_cpu(pathlen));
1672
1673         logrecord = journal_log_prefix(record, opcode, rec);
1674         logrecord = log_version(logrecord, tgt_dir_ver);
1675         logrecord = log_dentry_version(logrecord, dentry->d_parent);
1676         logrecord = log_version(logrecord, new_file_ver);
1677         logrecord = logit(logrecord, &lmode, sizeof(lmode));
1678         logrecord = logit(logrecord, &uid, sizeof(uid));
1679         logrecord = logit(logrecord, &gid, sizeof(gid));
1680         logrecord = logit(logrecord, &pathlen, sizeof(pathlen));
1681         logrecord = journal_log_suffix(logrecord, record, fset, dentry, rec);
1682
1683         error = presto_log(fset, rec, record, size,
1684                            path, size_round(le32_to_cpu(pathlen)),
1685                            NULL, 0, NULL, 0);
1686
1687         BUFF_FREE(buffer);
1688         EXIT;
1689         return error;
1690 }
1691
1692 int presto_journal_symlink(struct rec_info *rec, struct presto_file_set *fset,
1693                            struct dentry *dentry, const char *target,
1694                            struct presto_version *tgt_dir_ver,
1695                            struct presto_version *new_link_ver)
1696 {
1697         int opcode = KML_OPCODE_SYMLINK;
1698         char *buffer, *path, *logrecord, record[292];
1699         struct dentry *root;
1700         __u32 uid, gid, pathlen;
1701         __u32 targetlen = cpu_to_le32(strlen(target));
1702         int error, size;
1703
1704         ENTRY;
1705         if ( presto_no_journal(fset) ) {
1706                 EXIT;
1707                 return 0;
1708         }
1709
1710         root = fset->fset_dentry;
1711
1712         uid = cpu_to_le32(dentry->d_inode->i_uid);
1713         gid = cpu_to_le32(dentry->d_inode->i_gid);
1714
1715         BUFF_ALLOC(buffer, NULL);
1716         path = presto_path(dentry, root, buffer, PAGE_SIZE);
1717         pathlen = cpu_to_le32(MYPATHLEN(buffer, path));
1718         size =  sizeof(__u32) * current->group_info->ngroups + 
1719                 sizeof(struct kml_prefix_hdr) + 3 * sizeof(*tgt_dir_ver) +
1720                 sizeof(uid) + sizeof(gid) + sizeof(pathlen) +
1721                 sizeof(targetlen) + sizeof(struct kml_suffix);
1722
1723         if ( size > sizeof(record) )
1724                 CERROR("InterMezzo: BUFFER OVERFLOW in %s!\n", __FUNCTION__);
1725
1726         rec->is_kml = 1;
1727         rec->size = size + size_round(le32_to_cpu(pathlen)) +
1728                 size_round(le32_to_cpu(targetlen));
1729
1730         logrecord = journal_log_prefix(record, opcode, rec);
1731         logrecord = log_version(logrecord, tgt_dir_ver);
1732         logrecord = log_dentry_version(logrecord, dentry->d_parent);
1733         logrecord = log_version(logrecord, new_link_ver);
1734         logrecord = logit(logrecord, &uid, sizeof(uid));
1735         logrecord = logit(logrecord, &gid, sizeof(gid));
1736         logrecord = logit(logrecord, &pathlen, sizeof(pathlen));
1737         logrecord = logit(logrecord, &targetlen, sizeof(targetlen));
1738         logrecord = journal_log_suffix(logrecord, record, fset, dentry, rec);
1739
1740         error = presto_log(fset, rec, record, size,
1741                            path, size_round(le32_to_cpu(pathlen)),
1742                            target, size_round(le32_to_cpu(targetlen)),
1743                            NULL, 0);
1744
1745         BUFF_FREE(buffer);
1746         EXIT;
1747         return error;
1748 }
1749
1750 int presto_journal_mkdir(struct rec_info *rec, struct presto_file_set *fset,
1751                          struct dentry *dentry,
1752                          struct presto_version *tgt_dir_ver,
1753                          struct presto_version *new_dir_ver, int mode)
1754 {
1755         int opcode = KML_OPCODE_MKDIR;
1756         char *buffer, *path, *logrecord, record[292];
1757         struct dentry *root;
1758         __u32 uid, gid, lmode, pathlen;
1759         int error, size;
1760
1761         ENTRY;
1762         if ( presto_no_journal(fset) ) {
1763                 EXIT;
1764                 return 0;
1765         }
1766
1767         root = fset->fset_dentry;
1768
1769         uid = cpu_to_le32(dentry->d_inode->i_uid);
1770         gid = cpu_to_le32(dentry->d_inode->i_gid);
1771         lmode = cpu_to_le32(mode);
1772
1773         BUFF_ALLOC(buffer, NULL);
1774         path = presto_path(dentry, root, buffer, PAGE_SIZE);
1775         pathlen = cpu_to_le32(MYPATHLEN(buffer, path));
1776         size = sizeof(__u32) * current->group_info->ngroups + 
1777                 sizeof(struct kml_prefix_hdr) + 3 * sizeof(*tgt_dir_ver) +
1778                 sizeof(lmode) + sizeof(uid) + sizeof(gid) + sizeof(pathlen) +
1779                 sizeof(struct kml_suffix);
1780
1781         if ( size > sizeof(record) )
1782                 CERROR("InterMezzo: BUFFER OVERFLOW in %s!\n", __FUNCTION__);
1783
1784         rec->is_kml = 1;
1785         rec->size = size + size_round(le32_to_cpu(pathlen));
1786         logrecord = journal_log_prefix(record, opcode, rec);
1787
1788         logrecord = log_version(logrecord, tgt_dir_ver);
1789         logrecord = log_dentry_version(logrecord, dentry->d_parent);
1790         logrecord = log_version(logrecord, new_dir_ver);
1791         logrecord = logit(logrecord, &lmode, sizeof(lmode));
1792         logrecord = logit(logrecord, &uid, sizeof(uid));
1793         logrecord = logit(logrecord, &gid, sizeof(gid));
1794         logrecord = logit(logrecord, &pathlen, sizeof(pathlen));
1795         logrecord = journal_log_suffix(logrecord, record, fset, dentry, rec);
1796
1797         error = presto_log(fset, rec, record, size,
1798                            path, size_round(le32_to_cpu(pathlen)),
1799                            NULL, 0, NULL, 0);
1800
1801         BUFF_FREE(buffer);
1802         EXIT;
1803         return error;
1804 }
1805
1806
1807 int
1808 presto_journal_rmdir(struct rec_info *rec, struct presto_file_set *fset,
1809                      struct dentry *dir, struct presto_version *tgt_dir_ver,
1810                      struct presto_version *old_dir_ver,
1811                      struct izo_rollback_data *rb, int len, const char *name)
1812 {
1813         int opcode = KML_OPCODE_RMDIR;
1814         char *buffer, *path, *logrecord, record[316];
1815         __u32 pathlen, llen;
1816         struct dentry *root;
1817         int error, size;
1818
1819         ENTRY;
1820         if ( presto_no_journal(fset) ) {
1821                 EXIT;
1822                 return 0;
1823         }
1824
1825         root = fset->fset_dentry;
1826
1827         llen = cpu_to_le32(len);
1828         BUFF_ALLOC(buffer, NULL);
1829         path = presto_path(dir, root, buffer, PAGE_SIZE);
1830         pathlen = cpu_to_le32(MYPATHLEN(buffer, path));
1831         size =  sizeof(__u32) * current->group_info->ngroups + 
1832                 sizeof(struct kml_prefix_hdr) + 3 * sizeof(*tgt_dir_ver) +
1833                 sizeof(pathlen) + sizeof(llen) + sizeof(*rb) +
1834                 sizeof(struct kml_suffix);
1835
1836         if ( size > sizeof(record) )
1837                 CERROR("InterMezzo: BUFFER OVERFLOW in %s!\n", __FUNCTION__);
1838
1839         CDEBUG(D_JOURNAL, "path: %s (%d), name: %s (%d), size %d\n",
1840                path, pathlen, name, len, size);
1841
1842         rec->is_kml = 1;
1843         rec->size = size + size_round(le32_to_cpu(pathlen)) + 
1844                 size_round(len);
1845
1846         logrecord = journal_log_prefix(record, opcode, rec);
1847         logrecord = log_version(logrecord, tgt_dir_ver);
1848         logrecord = log_dentry_version(logrecord, dir);
1849         logrecord = log_version(logrecord, old_dir_ver);
1850         logrecord = logit(logrecord, rb, sizeof(*rb));
1851         logrecord = logit(logrecord, &pathlen, sizeof(pathlen));
1852         logrecord = logit(logrecord, &llen, sizeof(llen));
1853         logrecord = journal_log_suffix(logrecord, record, fset, dir, rec);
1854         error = presto_log(fset, rec, record, size,
1855                            path, size_round(le32_to_cpu(pathlen)),
1856                            name, size_round(len),
1857                            NULL, 0);
1858
1859         BUFF_FREE(buffer);
1860         EXIT;
1861         return error;
1862 }
1863
1864
1865 int
1866 presto_journal_mknod(struct rec_info *rec, struct presto_file_set *fset,
1867                      struct dentry *dentry, struct presto_version *tgt_dir_ver,
1868                      struct presto_version *new_node_ver, int mode,
1869                      int dmajor, int dminor )
1870 {
1871         int opcode = KML_OPCODE_MKNOD;
1872         char *buffer, *path, *logrecord, record[292];
1873         struct dentry *root;
1874         __u32 uid, gid, lmode, lmajor, lminor, pathlen;
1875         int error, size;
1876
1877         ENTRY;
1878         if ( presto_no_journal(fset) ) {
1879                 EXIT;
1880                 return 0;
1881         }
1882
1883         root = fset->fset_dentry;
1884
1885         uid = cpu_to_le32(dentry->d_inode->i_uid);
1886         gid = cpu_to_le32(dentry->d_inode->i_gid);
1887         lmode = cpu_to_le32(mode);
1888         lmajor = cpu_to_le32(dmajor);
1889         lminor = cpu_to_le32(dminor);
1890
1891         BUFF_ALLOC(buffer, NULL);
1892         path = presto_path(dentry, root, buffer, PAGE_SIZE);
1893         pathlen = cpu_to_le32(MYPATHLEN(buffer, path));
1894         size = sizeof(__u32) * current->group_info->ngroups + 
1895                 sizeof(struct kml_prefix_hdr) + 3 * sizeof(*tgt_dir_ver) +
1896                 sizeof(lmode) + sizeof(uid) + sizeof(gid) + sizeof(lmajor) +
1897                 sizeof(lminor) + sizeof(pathlen) +
1898                 sizeof(struct kml_suffix);
1899
1900         if ( size > sizeof(record) )
1901                 CERROR("InterMezzo: BUFFER OVERFLOW in %s!\n", __FUNCTION__);
1902
1903         rec->is_kml = 1;
1904         rec->size = size + size_round(le32_to_cpu(pathlen));
1905
1906         logrecord = journal_log_prefix(record, opcode, rec);
1907         logrecord = log_version(logrecord, tgt_dir_ver);
1908         logrecord = log_dentry_version(logrecord, dentry->d_parent);
1909         logrecord = log_version(logrecord, new_node_ver);
1910         logrecord = logit(logrecord, &lmode, sizeof(lmode));
1911         logrecord = logit(logrecord, &uid, sizeof(uid));
1912         logrecord = logit(logrecord, &gid, sizeof(gid));
1913         logrecord = logit(logrecord, &lmajor, sizeof(lmajor));
1914         logrecord = logit(logrecord, &lminor, sizeof(lminor));
1915         logrecord = logit(logrecord, &pathlen, sizeof(pathlen));
1916         logrecord = journal_log_suffix(logrecord, record, fset, dentry, rec);
1917
1918         error = presto_log(fset, rec, record, size,
1919                            path, size_round(le32_to_cpu(pathlen)),
1920                            NULL, 0, NULL, 0);
1921
1922         BUFF_FREE(buffer);
1923         EXIT;
1924         return error;
1925 }
1926
1927 int
1928 presto_journal_link(struct rec_info *rec, struct presto_file_set *fset,
1929                     struct dentry *src, struct dentry *tgt,
1930                     struct presto_version *tgt_dir_ver,
1931                     struct presto_version *new_link_ver)
1932 {
1933         int opcode = KML_OPCODE_LINK;
1934         char *buffer, *srcbuffer, *path, *srcpath, *logrecord, record[292];
1935         __u32 pathlen, srcpathlen;
1936         struct dentry *root;
1937         int error, size;
1938
1939         ENTRY;
1940         if ( presto_no_journal(fset) ) {
1941                 EXIT;
1942                 return 0;
1943         }
1944
1945         root = fset->fset_dentry;
1946
1947         BUFF_ALLOC(srcbuffer, NULL);
1948         srcpath = presto_path(src, root, srcbuffer, PAGE_SIZE);
1949         srcpathlen = cpu_to_le32(MYPATHLEN(srcbuffer, srcpath));
1950
1951         BUFF_ALLOC(buffer, srcbuffer);
1952         path = presto_path(tgt, root, buffer, PAGE_SIZE);
1953         pathlen = cpu_to_le32(MYPATHLEN(buffer, path));
1954         size =  sizeof(__u32) * current->group_info->ngroups + 
1955                 sizeof(struct kml_prefix_hdr) + 3 * sizeof(*tgt_dir_ver) +
1956                 sizeof(srcpathlen) + sizeof(pathlen) +
1957                 sizeof(struct kml_suffix);
1958
1959         if ( size > sizeof(record) )
1960                 CERROR("InterMezzo: BUFFER OVERFLOW in %s!\n", __FUNCTION__);
1961
1962         rec->is_kml = 1;
1963         rec->size = size + size_round(le32_to_cpu(pathlen)) + 
1964                 size_round(le32_to_cpu(srcpathlen));
1965
1966         logrecord = journal_log_prefix(record, opcode, rec);
1967         logrecord = log_version(logrecord, tgt_dir_ver);
1968         logrecord = log_dentry_version(logrecord, tgt->d_parent);
1969         logrecord = log_version(logrecord, new_link_ver);
1970         logrecord = logit(logrecord, &srcpathlen, sizeof(srcpathlen));
1971         logrecord = logit(logrecord, &pathlen, sizeof(pathlen));
1972         logrecord = journal_log_suffix(logrecord, record, fset, tgt, rec);
1973
1974         error = presto_log(fset, rec, record, size,
1975                            srcpath, size_round(le32_to_cpu(srcpathlen)),
1976                            path, size_round(le32_to_cpu(pathlen)),
1977                            NULL, 0);
1978
1979         BUFF_FREE(srcbuffer);
1980         BUFF_FREE(buffer);
1981         EXIT;
1982         return error;
1983 }
1984
1985
1986 int presto_journal_rename(struct rec_info *rec, struct presto_file_set *fset,
1987                           struct dentry *src, struct dentry *tgt,
1988                           struct presto_version *src_dir_ver,
1989                           struct presto_version *tgt_dir_ver)
1990 {
1991         int opcode = KML_OPCODE_RENAME;
1992         char *buffer, *srcbuffer, *path, *srcpath, *logrecord, record[292];
1993         __u32 pathlen, srcpathlen;
1994         struct dentry *root;
1995         int error, size;
1996
1997         ENTRY;
1998         if ( presto_no_journal(fset) ) {
1999                 EXIT;
2000                 return 0;
2001         }
2002
2003         root = fset->fset_dentry;
2004
2005         BUFF_ALLOC(srcbuffer, NULL);
2006         srcpath = presto_path(src, root, srcbuffer, PAGE_SIZE);
2007         srcpathlen = cpu_to_le32(MYPATHLEN(srcbuffer, srcpath));
2008
2009         BUFF_ALLOC(buffer, srcbuffer);
2010         path = presto_path(tgt, root, buffer, PAGE_SIZE);
2011         pathlen = cpu_to_le32(MYPATHLEN(buffer, path));
2012         size =  sizeof(__u32) * current->group_info->ngroups + 
2013                 sizeof(struct kml_prefix_hdr) + 4 * sizeof(*src_dir_ver) +
2014                 sizeof(srcpathlen) + sizeof(pathlen) +
2015                 sizeof(struct kml_suffix);
2016
2017         if ( size > sizeof(record) )
2018                 CERROR("InterMezzo: BUFFER OVERFLOW in %s!\n", __FUNCTION__);
2019
2020         rec->is_kml = 1;
2021         rec->size = size + size_round(le32_to_cpu(pathlen)) + 
2022                 size_round(le32_to_cpu(srcpathlen));
2023
2024         logrecord = journal_log_prefix(record, opcode, rec);
2025         logrecord = log_version(logrecord, src_dir_ver);
2026         logrecord = log_dentry_version(logrecord, src->d_parent);
2027         logrecord = log_version(logrecord, tgt_dir_ver);
2028         logrecord = log_dentry_version(logrecord, tgt->d_parent);
2029         logrecord = logit(logrecord, &srcpathlen, sizeof(srcpathlen));
2030         logrecord = logit(logrecord, &pathlen, sizeof(pathlen));
2031         logrecord = journal_log_suffix(logrecord, record, fset, tgt, rec);
2032
2033         error = presto_log(fset, rec, record, size,
2034                            srcpath, size_round(le32_to_cpu(srcpathlen)),
2035                            path, size_round(le32_to_cpu(pathlen)),
2036                            NULL, 0);
2037
2038         BUFF_FREE(buffer);
2039         BUFF_FREE(srcbuffer);
2040         EXIT;
2041         return error;
2042 }
2043
2044 int presto_journal_unlink(struct rec_info *rec, struct presto_file_set *fset,
2045                           struct dentry *dir, struct presto_version *tgt_dir_ver,
2046                           struct presto_version *old_file_ver,
2047                           struct izo_rollback_data *rb, struct dentry *dentry,
2048                           char *old_target, int old_targetlen)
2049 {
2050         int opcode = KML_OPCODE_UNLINK;
2051         char *buffer, *path, *logrecord, record[316];
2052         const char *name;
2053         __u32 pathlen, llen;
2054         struct dentry *root;
2055         int error, size, len;
2056
2057         ENTRY;
2058         if ( presto_no_journal(fset) ) {
2059                 EXIT;
2060                 return 0;
2061         }
2062
2063         root = fset->fset_dentry;
2064
2065         name = dentry->d_name.name;
2066         len = dentry->d_name.len;
2067
2068         llen = cpu_to_le32(len);
2069         BUFF_ALLOC(buffer, NULL);
2070         path = presto_path(dir, root, buffer, PAGE_SIZE);
2071         pathlen = cpu_to_le32(MYPATHLEN(buffer, path));
2072         size = sizeof(__u32) * current->group_info->ngroups + 
2073                 sizeof(struct kml_prefix_hdr) + 3 * sizeof(*tgt_dir_ver) +
2074                 sizeof(pathlen) + sizeof(llen) + sizeof(*rb) +
2075                 sizeof(old_targetlen) + sizeof(struct kml_suffix);
2076
2077         if ( size > sizeof(record) )
2078                 CERROR("InterMezzo: BUFFER OVERFLOW in %s!\n", __FUNCTION__);
2079
2080         rec->is_kml = 1;
2081         rec->size = size + size_round(le32_to_cpu(pathlen)) + size_round(len) +
2082                 size_round(old_targetlen);
2083
2084         logrecord = journal_log_prefix(record, opcode, rec);
2085         logrecord = log_version(logrecord, tgt_dir_ver);
2086         logrecord = log_dentry_version(logrecord, dir);
2087         logrecord = log_version(logrecord, old_file_ver);
2088         logrecord = log_rollback(logrecord, rb);
2089         logrecord = logit(logrecord, &pathlen, sizeof(pathlen));
2090         logrecord = logit(logrecord, &llen, sizeof(llen));
2091         logrecord = logit(logrecord, &old_targetlen, sizeof(old_targetlen));
2092         logrecord = journal_log_suffix(logrecord, record, fset, dir, rec);
2093
2094         error = presto_log(fset, rec, record, size,
2095                            path, size_round(le32_to_cpu(pathlen)),
2096                            name, size_round(len),
2097                            old_target, size_round(old_targetlen));
2098
2099         BUFF_FREE(buffer);
2100         EXIT;
2101         return error;
2102 }
2103
2104 int
2105 presto_journal_close(struct rec_info *rec, struct presto_file_set *fset,
2106                      struct presto_file_data *fd, struct dentry *dentry,
2107                      struct presto_version *old_file_ver,
2108                      struct presto_version *new_file_ver)
2109 {
2110         int opcode = KML_OPCODE_CLOSE;
2111         char *buffer, *path, *logrecord, record[316];
2112         struct dentry *root;
2113         int error, size, i;
2114         __u32 pathlen, generation;
2115         __u64 ino;
2116         __u32 open_fsuid;
2117         __u32 open_fsgid;
2118         __u32 open_ngroups;
2119         __u32 open_groups[NGROUPS_SMALL];
2120         __u32 open_mode;
2121         __u32 open_uid;
2122         __u32 open_gid;
2123
2124         ENTRY;
2125
2126         if ( presto_no_journal(fset) ) {
2127                 EXIT;
2128                 return 0;
2129         }
2130
2131         if (!dentry->d_inode || (dentry->d_inode->i_nlink == 0) 
2132             || ((dentry->d_parent != dentry) && d_unhashed(dentry))) {
2133                 EXIT;
2134                 return 0;
2135         }
2136
2137         root = fset->fset_dentry;
2138
2139         if (fd) {
2140                 open_ngroups = fd->fd_ngroups;
2141                 for (i = 0; i < fd->fd_ngroups; i++)
2142                         open_groups[i] = (__u32) fd->fd_groups[i];
2143                 open_mode = fd->fd_mode;
2144                 open_uid = fd->fd_uid;
2145                 open_gid = fd->fd_gid;
2146                 open_fsuid = fd->fd_fsuid;
2147                 open_fsgid = fd->fd_fsgid;
2148         } else {
2149                 open_ngroups = current->group_info->ngroups;
2150                 for (i=0; i<current->group_info->ngroups; i++)
2151                         open_groups[i] =  (__u32) GROUP_AT(current->group_info,i); 
2152                 open_mode = dentry->d_inode->i_mode;
2153                 open_uid = dentry->d_inode->i_uid;
2154                 open_gid = dentry->d_inode->i_gid;
2155                 open_fsuid = current->fsuid;
2156                 open_fsgid = current->fsgid;
2157         }
2158         BUFF_ALLOC(buffer, NULL);
2159         path = presto_path(dentry, root, buffer, PAGE_SIZE);
2160         pathlen = cpu_to_le32(MYPATHLEN(buffer, path));
2161         ino = cpu_to_le64(dentry->d_inode->i_ino);
2162         generation = cpu_to_le32(dentry->d_inode->i_generation);
2163         size =  sizeof(__u32) * open_ngroups +
2164                 sizeof(open_mode) + sizeof(open_uid) + sizeof(open_gid) +
2165                 sizeof(struct kml_prefix_hdr) + sizeof(*old_file_ver) +
2166                 sizeof(*new_file_ver) + sizeof(ino) + sizeof(generation) +
2167                 sizeof(pathlen) + sizeof(struct kml_suffix);
2168
2169         if ( size > sizeof(record) )
2170                 CERROR("InterMezzo: BUFFER OVERFLOW in %s!\n", __FUNCTION__);
2171
2172         rec->is_kml = 1;
2173         rec->size = size + size_round(le32_to_cpu(pathlen));
2174
2175         logrecord = journal_log_prefix_with_groups_and_ids(
2176                 record, opcode, rec, open_ngroups, open_groups,
2177                 open_fsuid, open_fsgid);
2178         logrecord = logit(logrecord, &open_mode, sizeof(open_mode));
2179         logrecord = logit(logrecord, &open_uid, sizeof(open_uid));
2180         logrecord = logit(logrecord, &open_gid, sizeof(open_gid));
2181         logrecord = log_version(logrecord, old_file_ver);
2182         logrecord = log_version(logrecord, new_file_ver);
2183         logrecord = logit(logrecord, &ino, sizeof(ino));
2184         logrecord = logit(logrecord, &generation, sizeof(generation));
2185         logrecord = logit(logrecord, &pathlen, sizeof(pathlen));
2186         logrecord = journal_log_suffix(logrecord, record, fset, dentry, rec);
2187
2188         error = presto_log(fset, rec, record, size,
2189                            path, size_round(le32_to_cpu(pathlen)),
2190                            NULL, 0, NULL, 0);
2191         BUFF_FREE(buffer);
2192
2193         EXIT;
2194         return error;
2195 }
2196
2197 int presto_rewrite_close(struct rec_info *rec, struct presto_file_set *fset, 
2198                          char *path, __u32 pathlen, 
2199                          int ngroups, __u32 *groups, 
2200                          __u64 ino,     __u32 generation, 
2201                          struct presto_version *new_file_ver)
2202 {
2203         int opcode = KML_OPCODE_CLOSE;
2204         char *logrecord, record[292];
2205         struct dentry *root;
2206         int error, size;
2207
2208         ENTRY;
2209
2210         if ( presto_no_journal(fset) ) {
2211                 EXIT;
2212                 return 0;
2213         }
2214
2215         root = fset->fset_dentry;
2216
2217         size =  sizeof(__u32) * ngroups + 
2218                 sizeof(struct kml_prefix_hdr) + sizeof(*new_file_ver) +
2219                 sizeof(ino) + sizeof(generation) + 
2220                 sizeof(le32_to_cpu(pathlen)) +
2221                 sizeof(struct kml_suffix);
2222
2223         if ( size > sizeof(record) )
2224                 CERROR("InterMezzo: BUFFER OVERFLOW in %s!\n", __FUNCTION__);
2225
2226         rec->is_kml = 1;
2227         rec->size = size + size_round(le32_to_cpu(pathlen));
2228
2229         logrecord = journal_log_prefix_with_groups(record, opcode, rec,
2230                                                    ngroups, groups);
2231         logrecord = log_version(logrecord, new_file_ver);
2232         logrecord = logit(logrecord, &ino, sizeof(ino));
2233         logrecord = logit(logrecord, &generation, sizeof(generation));
2234         logrecord = logit(logrecord, &pathlen, sizeof(pathlen));
2235         logrecord = journal_log_suffix(logrecord, record, fset, NULL, rec);
2236
2237         error = presto_log(fset, rec, record, size,
2238                            path, size_round(le32_to_cpu(pathlen)),
2239                            NULL, 0, NULL, 0);
2240
2241         EXIT;
2242         return error;
2243 }
2244
2245
2246 /* write closes for the local close records in the LML */ 
2247 int presto_complete_lml(struct presto_file_set *fset)
2248 {
2249         __u32 groups[NGROUPS_SMALL];
2250         loff_t lml_offset;
2251         loff_t read_offset; 
2252         char *buffer;
2253         void *handle;
2254         struct rec_info rec;
2255         struct close_rec { 
2256                 struct presto_version new_file_ver;
2257                 __u64 ino;
2258                 __u32 generation;
2259                 __u32 pathlen;
2260                 __u64 remote_ino;
2261                 __u32 remote_generation;
2262                 __u32 remote_version;
2263                 __u64 lml_offset;
2264         } close_rec; 
2265         struct file *file = fset->fset_lml.fd_file;
2266         struct kml_prefix_hdr prefix;
2267         int rc = 0;
2268         ENTRY;
2269
2270         lml_offset = 0; 
2271  again: 
2272         if (lml_offset >= file->f_dentry->d_inode->i_size) {
2273                 EXIT;
2274                 return rc;
2275         }
2276
2277         read_offset = lml_offset;
2278         rc = presto_fread(file, (char *)&prefix,
2279                           sizeof(prefix), &read_offset);
2280         if ( rc != sizeof(prefix) ) {
2281                 EXIT;
2282                 CERROR("presto_complete_lml: ioerror - 1, tell Peter\n");
2283                 return -EIO;
2284         }
2285
2286         if ( prefix.opcode == KML_OPCODE_NOOP ) {
2287                 lml_offset += prefix.len; 
2288                 goto again; 
2289         }
2290
2291         rc = presto_fread(file, (char *)groups, 
2292                           prefix.ngroups * sizeof(__u32), &read_offset); 
2293         if ( rc != prefix.ngroups * sizeof(__u32) ) {
2294                 EXIT;
2295                 CERROR("presto_complete_lml: ioerror - 2, tell Peter\n");
2296                 return -EIO;
2297         }
2298
2299         rc = presto_fread(file, (char *)&close_rec, 
2300                           sizeof(close_rec), &read_offset); 
2301         if ( rc != sizeof(close_rec) ) {
2302                 EXIT;
2303                 CERROR("presto_complete_lml: ioerror - 3, tell Peter\n");
2304                 return -EIO;
2305         }
2306
2307         /* is this a backfetch or a close record? */ 
2308         if ( le64_to_cpu(close_rec.remote_ino) != 0 ) { 
2309                 lml_offset += prefix.len;
2310                 goto again; 
2311         }
2312
2313         BUFF_ALLOC(buffer, NULL);
2314         rc = presto_fread(file, (char *)buffer, 
2315                           le32_to_cpu(close_rec.pathlen), &read_offset); 
2316         if ( rc != le32_to_cpu(close_rec.pathlen) ) {
2317                 EXIT;
2318                 CERROR("presto_complete_lml: ioerror - 4, tell Peter\n");
2319                 return -EIO;
2320         }
2321         
2322         handle = presto_trans_start(fset, file->f_dentry->d_inode, 
2323                                     KML_OPCODE_RELEASE);
2324         if ( IS_ERR(handle) ) {
2325                 EXIT;
2326                 return -ENOMEM; 
2327         }
2328
2329         rc = presto_clear_lml_close(fset, lml_offset); 
2330         if ( rc ) {
2331                 CERROR("error during clearing: %d\n", rc);
2332                 presto_trans_commit(fset, handle);
2333                 EXIT; 
2334                 return rc; 
2335         }
2336
2337         rc = presto_rewrite_close(&rec, fset, buffer, close_rec.pathlen, 
2338                                   prefix.ngroups, groups, 
2339                                   close_rec.ino, close_rec.generation,
2340                                   &close_rec.new_file_ver); 
2341         if ( rc ) {
2342                 CERROR("error during rewrite close: %d\n", rc);
2343                 presto_trans_commit(fset, handle);
2344                 EXIT; 
2345                 return rc; 
2346         }
2347
2348         presto_trans_commit(fset, handle); 
2349         if ( rc ) { 
2350                 CERROR("error during truncation: %d\n", rc);
2351                 EXIT; 
2352                 return rc;
2353         }
2354         
2355         lml_offset += prefix.len; 
2356         CDEBUG(D_JOURNAL, "next LML record at: %ld\n", (long)lml_offset);
2357         goto again;
2358
2359         EXIT;
2360         return -EINVAL;
2361 }
2362
2363
2364 #ifdef CONFIG_FS_EXT_ATTR
2365 /* Journal an ea operation. A NULL buffer implies the attribute is 
2366  * getting deleted. In this case we simply change the opcode, but nothing
2367  * else is affected.
2368  */
2369 int presto_journal_set_ext_attr (struct rec_info *rec, 
2370                                  struct presto_file_set *fset, 
2371                                  struct dentry *dentry, 
2372                                  struct presto_version *ver, const char *name, 
2373                                  const char *buffer, int buffer_len, 
2374                                  int flags) 
2375
2376         int opcode = (buffer == NULL) ? 
2377                      KML_OPCODE_DELEXTATTR : 
2378                      KML_OPCODE_SETEXTATTR ;
2379         char *temp, *path, *logrecord, record[292];
2380         struct dentry *root;
2381         int error, size;
2382         __u32 namelen=cpu_to_le32(strnlen(name,PRESTO_EXT_ATTR_NAME_MAX));
2383         __u32 buflen=(buffer != NULL)? cpu_to_le32(buffer_len): cpu_to_le32(0);
2384         __u32 mode, pathlen;
2385
2386         ENTRY;
2387         if ( presto_no_journal(fset) ) {
2388                 EXIT;
2389                 return 0;
2390         }
2391
2392         if (!dentry->d_inode || (dentry->d_inode->i_nlink == 0) 
2393             || ((dentry->d_parent != dentry) && d_unhashed(dentry))) {
2394                 EXIT;
2395                 return 0;
2396         }
2397
2398         root = fset->fset_dentry;
2399
2400         BUFF_ALLOC(temp, NULL);
2401         path = presto_path(dentry, root, temp, PAGE_SIZE);
2402         pathlen = cpu_to_le32(MYPATHLEN(temp, path));
2403
2404         flags=cpu_to_le32(flags);
2405         /* Ugly, but needed. posix ACLs change the mode without using
2406          * setattr, we need to record these changes. The EA code per se
2407          * is not really affected.
2408          */
2409         mode=cpu_to_le32(dentry->d_inode->i_mode);
2410
2411         size =  sizeof(__u32) * current->group_info->ngroups + 
2412                 sizeof(struct kml_prefix_hdr) + 
2413                 2 * sizeof(struct presto_version) +
2414                 sizeof(flags) + sizeof(mode) + sizeof(namelen) + 
2415                 sizeof(buflen) + sizeof(pathlen) + 
2416                 sizeof(struct kml_suffix);
2417
2418         if ( size > sizeof(record) )
2419                 CERROR("InterMezzo: BUFFER OVERFLOW in %s!\n", __FUNCTION__);
2420
2421         rec->is_kml = 1;
2422         /* Make space for a path, a attr name and value*/
2423         /* We use the buflen instead of buffer_len to make sure that we 
2424          * journal the right length. This may be a little paranoid, but
2425          * with 64 bits round the corner, I would rather be safe than sorry!
2426          * Also this handles deletes with non-zero buffer_lengths correctly.
2427          * SHP
2428          */
2429         rec->size = size + size_round(le32_to_cpu(pathlen)) +
2430                     size_round(le32_to_cpu(namelen)) + 
2431                     size_round(le32_to_cpu(buflen));
2432
2433         logrecord = journal_log_prefix(record, opcode, rec);
2434         logrecord = log_version(logrecord, ver);
2435         logrecord = log_dentry_version(logrecord, dentry);
2436         logrecord = logit(logrecord, &flags, sizeof(flags));
2437         logrecord = logit(logrecord, &mode, sizeof(flags));
2438         logrecord = logit(logrecord, &pathlen, sizeof(pathlen));
2439         logrecord = logit(logrecord, &namelen, sizeof(namelen));
2440         logrecord = logit(logrecord, &buflen, sizeof(buflen));
2441         logrecord = journal_log_suffix(logrecord, record, fset, dentry, rec);
2442
2443         error = presto_log(fset, rec, record, size,
2444                            path, size_round(le32_to_cpu(pathlen)),
2445                            name, size_round(le32_to_cpu(namelen)),
2446                            buffer, size_round(le32_to_cpu(buflen)));
2447
2448         BUFF_FREE(temp);
2449         EXIT;
2450         return error;
2451 }
2452 #endif