1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2 * vim:expandtab:shiftwidth=8:tabstop=8:
4 * Copyright (C) 1998 Peter J. Braam
5 * Copyright (C) 2001 Cluster File Systems, Inc.
6 * Copyright (C) 2001 Tacit Networks, Inc. <phil@off.net>
8 * Support for journalling extended attributes
9 * Copyright (C) 2001 Shirish H. Phatak, Tacit Networks, Inc.
11 * This file is part of InterMezzo, http://www.inter-mezzo.org.
13 * InterMezzo is free software; you can redistribute it and/or
14 * modify it under the terms of version 2 of the GNU General Public
15 * License as published by the Free Software Foundation.
17 * InterMezzo is distributed in the hope that it will be useful,
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 * GNU General Public License for more details.
22 * You should have received a copy of the GNU General Public License
23 * along with InterMezzo; if not, write to the Free Software
24 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
27 #include <linux/types.h>
28 #include <linux/kernel.h>
29 #include <linux/sched.h>
31 #include <linux/namei.h>
32 #include <linux/slab.h>
33 #include <linux/vmalloc.h>
34 #include <linux/time.h>
35 #include <linux/errno.h>
36 #include <asm/segment.h>
37 #include <asm/uaccess.h>
38 #include <linux/string.h>
40 #include "intermezzo_fs.h"
41 #include "intermezzo_psdev.h"
43 struct presto_reservation_data {
44 unsigned int ri_recno;
47 struct list_head ri_list;
53 * write lock in struct presto_log_fd:
55 * - required for: accessing any field in a presto_log_fd
56 * - may not be held across I/O
62 * reserve record space and/or atomically request state of the log
63 * rec will hold the location reserved record upon return
64 * this reservation will be placed in the queue
66 static void presto_reserve_record(struct presto_file_set *fset,
67 struct presto_log_fd *fd,
69 struct presto_reservation_data *rd)
71 int chunked_record = 0;
74 write_lock(&fd->fd_lock);
76 int chunk = 1 << fset->fset_chunkbits;
77 int chunk_mask = ~(chunk -1);
80 boundary = (fd->fd_offset + chunk - 1) & chunk_mask;
81 if ( fd->fd_offset + rec->size >= boundary ) {
83 fd->fd_offset = boundary;
89 /* this moves the fd_offset back after truncation */
90 if ( list_empty(&fd->fd_reservations) &&
92 fd->fd_offset = fd->fd_file->f_dentry->d_inode->i_size;
95 rec->offset = fd->fd_offset;
97 rec->offset += fset->fset_kml_logical_off;
99 rec->recno = fd->fd_recno;
101 /* add the reservation data to the end of the list */
102 rd->ri_offset = fd->fd_offset;
103 rd->ri_size = rec->size;
104 rd->ri_recno = rec->recno;
105 list_add(&rd->ri_list, fd->fd_reservations.prev);
107 fd->fd_offset += rec->size;
109 write_unlock(&fd->fd_lock);
114 static inline void presto_release_record(struct presto_log_fd *fd,
115 struct presto_reservation_data *rd)
117 write_lock(&fd->fd_lock);
118 list_del(&rd->ri_list);
119 write_unlock(&fd->fd_lock);
122 /* XXX should we ask for do_truncate to be exported? */
123 int izo_do_truncate(struct presto_file_set *fset, struct dentry *dentry,
124 loff_t length, loff_t size_check)
126 struct inode *inode = dentry->d_inode;
128 struct iattr newattrs;
140 if (size_check != inode->i_size) {
147 newattrs.ia_size = length;
148 newattrs.ia_valid = ATTR_SIZE | ATTR_CTIME;
150 if (inode->i_op && inode->i_op->setattr)
151 error = inode->i_op->setattr(dentry, &newattrs);
153 inode_setattr(dentry->d_inode, &newattrs);
163 static void presto_kml_truncate(struct presto_file_set *fset)
168 write_lock(&fset->fset_kml.fd_lock);
169 if (fset->fset_kml.fd_truncating == 1 ) {
170 write_unlock(&fset->fset_kml.fd_lock);
175 fset->fset_kml.fd_truncating = 1;
176 write_unlock(&fset->fset_kml.fd_lock);
178 CERROR("islento: %d, count: %d\n",
179 ISLENTO(presto_i2m(fset->fset_dentry->d_inode)),
180 fset->fset_permit_count);
182 rc = izo_upc_kml_truncate(fset->fset_cache->cache_psdev->uc_minor,
183 fset->fset_lento_off, fset->fset_lento_recno,
186 /* Userspace is the only permitholder now, and will retain an exclusive
187 * hold on the permit until KML truncation completes. */
188 /* FIXME: double check this code path now that the precise semantics of
189 * fset->fset_permit_count have changed. */
192 write_lock(&fset->fset_kml.fd_lock);
193 fset->fset_kml.fd_truncating = 0;
194 write_unlock(&fset->fset_kml.fd_lock);
200 void *presto_trans_start(struct presto_file_set *fset, struct inode *inode,
204 if ( !fset->fset_cache->cache_filter->o_trops ) {
209 return fset->fset_cache->cache_filter->o_trops->tr_start
213 void presto_trans_commit(struct presto_file_set *fset, void *handle)
216 if (!fset->fset_cache->cache_filter->o_trops ) {
221 fset->fset_cache->cache_filter->o_trops->tr_commit(fset, handle);
223 /* Check to see if the KML needs truncated. */
224 if (fset->kml_truncate_size > 0 &&
225 !fset->fset_kml.fd_truncating &&
226 fset->fset_kml.fd_offset > fset->kml_truncate_size) {
227 CDEBUG(D_JOURNAL, "kml size: %lu; truncating\n",
228 (unsigned long)fset->fset_kml.fd_offset);
229 presto_kml_truncate(fset);
234 inline int presto_no_journal(struct presto_file_set *fset)
236 int minor = fset->fset_cache->cache_psdev->uc_minor;
237 return izo_channels[minor].uc_no_journal;
240 #define size_round(x) (((x)+3) & ~0x3)
242 #define BUFF_FREE(buf) PRESTO_FREE(buf, PAGE_SIZE)
243 #define BUFF_ALLOC(newbuf, oldbuf) \
244 PRESTO_ALLOC(newbuf, PAGE_SIZE); \
252 * "buflen" should be PAGE_SIZE or more.
253 * Give relative path wrt to a fsetroot
255 char * presto_path(struct dentry *dentry, struct dentry *root,
256 char *buffer, int buflen)
258 char * end = buffer+buflen;
263 if (dentry->d_parent != dentry && d_unhashed(dentry)) {
266 memcpy(end, " (deleted)", 10);
274 struct dentry * parent;
279 parent = dentry->d_parent;
280 if (dentry == parent)
282 namelen = dentry->d_name.len;
283 buflen -= namelen + 1;
287 memcpy(end, dentry->d_name.name, namelen);
295 static inline char *logit(char *buf, const void *value, int size)
297 char *ptr = (char *)value;
299 memcpy(buf, ptr, size);
306 journal_log_prefix_with_groups_and_ids(char *buf, int opcode,
307 struct rec_info *rec,
308 __u32 ngroups, gid_t *groups,
309 __u32 fsuid, __u32 fsgid)
311 struct kml_prefix_hdr p;
312 u32 loggroups[NGROUPS_SMALL];
316 p.len = cpu_to_le32(rec->size);
317 p.version = KML_MAJOR_VERSION | KML_MINOR_VERSION;
318 p.pid = cpu_to_le32(current->pid);
319 p.auid = cpu_to_le32(current->uid);
320 p.fsuid = cpu_to_le32(fsuid);
321 p.fsgid = cpu_to_le32(fsgid);
322 p.ngroups = cpu_to_le32(ngroups);
323 p.opcode = cpu_to_le32(opcode);
324 for (i=0 ; i < ngroups ; i++)
325 loggroups[i] = cpu_to_le32((__u32) groups[i]);
327 buf = logit(buf, &p, sizeof(struct kml_prefix_hdr));
328 buf = logit(buf, &loggroups, sizeof(__u32) * ngroups);
333 journal_log_prefix(char *buf, int opcode, struct rec_info *rec)
335 __u32 groups[NGROUPS_SMALL];
338 /* convert 16 bit gid's to 32 bit gid's */
339 for (i=0; i<current->group_info->ngroups; i++)
340 groups[i] = GROUP_AT(current->group_info,i);
342 return journal_log_prefix_with_groups_and_ids(buf, opcode, rec,
343 (__u32)current->group_info->ngroups,
345 (__u32)current->fsuid,
346 (__u32)current->fsgid);
350 journal_log_prefix_with_groups(char *buf, int opcode, struct rec_info *rec,
351 __u32 ngroups, gid_t *groups)
353 return journal_log_prefix_with_groups_and_ids(buf, opcode, rec,
355 (__u32)current->fsuid,
356 (__u32)current->fsgid);
359 static inline char *log_dentry_version(char *buf, struct dentry *dentry)
361 struct presto_version version;
363 presto_getversion(&version, dentry->d_inode);
365 version.pv_mtime_sec = HTON__u32(version.pv_mtime_sec);
366 version.pv_ctime_sec = HTON__u32(version.pv_ctime_sec);
367 version.pv_mtime_nsec = HTON__u32(version.pv_mtime_nsec);
368 version.pv_ctime_nsec = HTON__u32(version.pv_ctime_nsec);
369 version.pv_size = HTON__u64(version.pv_size);
371 return logit(buf, &version, sizeof(version));
374 static inline char *log_version(char *buf, struct presto_version *pv)
376 struct presto_version version;
378 memcpy(&version, pv, sizeof(version));
380 version.pv_mtime_sec = HTON__u32(version.pv_mtime_sec);
381 version.pv_mtime_nsec = HTON__u32(version.pv_mtime_nsec);
382 version.pv_ctime_sec = HTON__u32(version.pv_ctime_sec);
383 version.pv_ctime_nsec = HTON__u32(version.pv_ctime_nsec);
384 version.pv_size = HTON__u64(version.pv_size);
386 return logit(buf, &version, sizeof(version));
389 static inline char *log_rollback(char *buf, struct izo_rollback_data *rb)
391 struct izo_rollback_data rollback;
393 rollback.rb_mode = HTON__u32(rb->rb_mode);
394 rollback.rb_rdev = HTON__u32(rb->rb_rdev);
395 rollback.rb_uid = HTON__u64(rb->rb_uid);
396 rollback.rb_gid = HTON__u64(rb->rb_gid);
398 return logit(buf, &rollback, sizeof(rollback));
401 static inline char *journal_log_suffix(char *buf, char *log,
402 struct presto_file_set *fset,
403 struct dentry *dentry,
404 struct rec_info *rec)
407 struct kml_prefix_hdr *p = (struct kml_prefix_hdr *)log;
410 /* XXX needs to be done after reservation,
411 disable ths until version 1.2 */
413 s.prevrec = cpu_to_le32(rec->offset -
414 presto_d2d(dentry)->dd_kml_offset);
415 presto_d2d(dentry)->dd_kml_offset = rec->offset;
422 /* record number needs to be filled in after reservation
423 s.recno = cpu_to_le32(rec->recno); */
424 s.time = cpu_to_le32(get_seconds());
426 return logit(buf, &s, sizeof(s));
429 int izo_log_close(struct presto_log_fd *logfd)
433 if (logfd->fd_file) {
434 rc = filp_close(logfd->fd_file, 0);
435 logfd->fd_file = NULL;
437 CERROR("InterMezzo: %s: no filp\n", __FUNCTION__);
439 CERROR("InterMezzo: close files: filp won't close: %d\n", rc);
444 int presto_fwrite(struct file *file, const char *str, int len, loff_t *off)
461 if ( ! file->f_op ) {
466 if ( ! file->f_op->write ) {
473 rc = file->f_op->write(file, str, len, off);
475 CERROR("presto_fwrite: wrote %d bytes instead of "
476 "%d at %ld\n", rc, len, (long)*off);
484 int presto_fread(struct file *file, char *str, int len, loff_t *off)
491 CERROR("presto_fread: read at %Ld for %d bytes, ino %ld\n",
492 *off, len, file->f_dentry->d_inode->i_ino);
505 if ( ! file->f_op ) {
510 if ( ! file->f_op->read ) {
517 rc = file->f_op->read(file, str, len, off);
519 CDEBUG(D_FILE, "presto_fread: read %d bytes instead of "
520 "%d at %Ld\n", rc, len, *off);
528 loff_t presto_kml_offset(struct presto_file_set *fset)
530 unsigned int kml_recno;
531 struct presto_log_fd *fd = &fset->fset_kml;
535 write_lock(&fd->fd_lock);
537 /* Determine the largest valid offset, i.e. up until the first
538 * reservation held on the file. */
539 if ( !list_empty(&fd->fd_reservations) ) {
540 struct presto_reservation_data *rd;
541 rd = list_entry(fd->fd_reservations.next,
542 struct presto_reservation_data,
544 offset = rd->ri_offset;
545 kml_recno = rd->ri_recno;
547 offset = fd->fd_file->f_dentry->d_inode->i_size;
548 kml_recno = fset->fset_kml.fd_recno;
550 write_unlock(&fd->fd_lock);
554 static int presto_kml_dispatch(struct presto_file_set *fset)
557 unsigned int kml_recno;
558 struct presto_log_fd *fd = &fset->fset_kml;
562 write_lock(&fd->fd_lock);
564 /* Determine the largest valid offset, i.e. up until the first
565 * reservation held on the file. */
566 if ( !list_empty(&fd->fd_reservations) ) {
567 struct presto_reservation_data *rd;
568 rd = list_entry(fd->fd_reservations.next,
569 struct presto_reservation_data,
571 offset = rd->ri_offset;
572 kml_recno = rd->ri_recno;
574 offset = fd->fd_file->f_dentry->d_inode->i_size;
575 kml_recno = fset->fset_kml.fd_recno;
578 if ( kml_recno < fset->fset_lento_recno ) {
579 CERROR("presto_kml_dispatch: smoke is coming\n");
580 write_unlock(&fd->fd_lock);
583 } else if ( kml_recno == fset->fset_lento_recno ) {
584 write_unlock(&fd->fd_lock);
587 /* XXX add a further "if" here to delay the KML upcall */
589 } else if ( kml_recno < fset->fset_lento_recno + 100) {
590 write_unlock(&fd->fd_lock);
595 CDEBUG(D_PIOCTL, "fset: %s\n", fset->fset_name);
597 rc = izo_upc_kml(fset->fset_cache->cache_psdev->uc_minor,
598 fset->fset_lento_off, fset->fset_lento_recno,
599 offset + fset->fset_kml_logical_off, kml_recno,
603 write_unlock(&fd->fd_lock);
608 fset->fset_lento_off = offset;
609 fset->fset_lento_recno = kml_recno;
610 write_unlock(&fd->fd_lock);
615 int izo_lookup_file(struct presto_file_set *fset, char *path,
616 struct nameidata *nd)
620 CDEBUG(D_CACHE, "looking up: %s\n", path);
622 error = path_lookup(path, LOOKUP_PARENT, nd);
631 /* FIXME: this function is a mess of locking and error handling. There's got to
632 * be a better way. */
633 static int do_truncate_rename(struct presto_file_set *fset, char *oldname,
636 struct dentry *old_dentry, *new_dentry;
637 struct nameidata oldnd, newnd;
638 char *oldpath, *newpath;
643 oldpath = izo_make_path(fset, oldname);
644 if (oldpath == NULL) {
649 newpath = izo_make_path(fset, newname);
650 if (newpath == NULL) {
656 if ((error = izo_lookup_file(fset, oldpath, &oldnd)) != 0) {
661 if ((error = izo_lookup_file(fset, newpath, &newnd)) != 0) {
666 lock_rename(newnd.dentry, oldnd.dentry);
667 old_dentry = lookup_hash(&oldnd.last, oldnd.dentry);
668 error = PTR_ERR(old_dentry);
669 if (IS_ERR(old_dentry)) {
674 if (!old_dentry->d_inode) {
678 new_dentry = lookup_hash(&newnd.last, newnd.dentry);
679 error = PTR_ERR(new_dentry);
680 if (IS_ERR(new_dentry)) {
686 extern int presto_rename(struct inode *old_dir,struct dentry *old_dentry,
687 struct inode *new_dir,struct dentry *new_dentry);
688 error = presto_rename(old_dentry->d_parent->d_inode, old_dentry,
689 new_dentry->d_parent->d_inode, new_dentry);
697 unlock_rename(newnd.dentry, oldnd.dentry);
698 path_release(&newnd);
700 path_release(&oldnd);
702 PRESTO_FREE(newpath, strlen(newpath) + 1);
704 PRESTO_FREE(oldpath, strlen(oldpath) + 1);
708 /* This function is called with the fset->fset_kml.fd_lock held */
709 int presto_finish_kml_truncate(struct presto_file_set *fset,
710 unsigned long int offset)
712 struct lento_vfs_context info;
715 struct dentry *dentry;
718 char *kmlpath = NULL, *smlpath = NULL;
722 /* Lento couldn't do what it needed to; abort the truncation. */
723 fset->fset_kml.fd_truncating = 0;
728 /* someone is about to write to the end of the KML; try again later. */
729 if ( !list_empty(&fset->fset_kml.fd_reservations) ) {
734 f = presto_copy_kml_tail(fset, offset);
740 /* In a single transaction:
743 * - rename 'kml_tmp' to 'kml'
745 * - rename 'sml_tmp' to 'sml'
746 * - rewrite the first record of last_rcvd with the new kml
749 handle = presto_trans_start(fset, fset->fset_dentry->d_inode,
750 KML_OPCODE_KML_TRUNC);
751 if (IS_ERR(handle)) {
752 presto_release_space(fset->fset_cache, PRESTO_REQLOW);
753 CERROR("ERROR: presto_finish_kml_truncate: no space for transaction\n");
758 memset(&info, 0, sizeof(info));
759 info.flags = LENTO_FL_IGNORE_TIME;
761 kmlpath = izo_make_path(fset, "kml");
762 if (kmlpath == NULL) {
764 CERROR("make_path failed: ENOMEM\n");
769 if ((error = izo_lookup_file(fset, kmlpath, &nd)) != 0) {
770 CERROR("izo_lookup_file(kml) failed: %d.\n", error);
774 down(&nd.dentry->d_inode->i_sem);
775 dentry = lookup_hash(&nd.last, nd.dentry);
776 error = PTR_ERR(dentry);
777 if (IS_ERR(dentry)) {
778 up(&nd.dentry->d_inode->i_sem);
780 CERROR("lookup_hash failed\n");
784 error = presto_do_unlink(fset, dentry->d_parent, dentry, &info);
786 up(&nd.dentry->d_inode->i_sem);
790 CERROR("presto_do_unlink(kml) failed: %d.\n", error);
795 smlpath = izo_make_path(fset, "sml");
796 if (smlpath == NULL) {
798 CERROR("make_path() failed: ENOMEM\n");
803 if ((error = izo_lookup_file(fset, smlpath, &nd)) != 0) {
804 CERROR("izo_lookup_file(sml) failed: %d.\n", error);
808 down(&nd.dentry->d_inode->i_sem);
809 dentry = lookup_hash(&nd.last, nd.dentry);
810 error = PTR_ERR(dentry);
811 if (IS_ERR(dentry)) {
812 up(&nd.dentry->d_inode->i_sem);
814 CERROR("lookup_hash failed\n");
818 error = presto_do_unlink(fset, dentry->d_parent, dentry, &info);
820 up(&nd.dentry->d_inode->i_sem);
824 CERROR("presto_do_unlink(sml) failed: %d.\n", error);
829 error = do_truncate_rename(fset, "kml_tmp", "kml");
831 CERROR("do_truncate_rename(kml_tmp, kml) failed: %d\n", error);
832 error = do_truncate_rename(fset, "sml_tmp", "sml");
834 CERROR("do_truncate_rename(sml_tmp, sml) failed: %d\n", error);
836 /* Write a new 'last_rcvd' record with the new KML offset */
837 fset->fset_kml_logical_off += offset;
838 CDEBUG(D_CACHE, "new kml_logical_offset: %Lu\n",
839 fset->fset_kml_logical_off);
840 if (presto_write_kml_logical_offset(fset) != 0) {
841 CERROR("presto_write_kml_logical_offset failed\n");
844 presto_trans_commit(fset, handle);
846 /* Everything was successful, so swap the KML file descriptors */
847 filp_close(fset->fset_kml.fd_file, NULL);
848 fset->fset_kml.fd_file = f;
849 fset->fset_kml.fd_offset -= offset;
850 fset->fset_kml.fd_truncating = 0;
856 presto_trans_commit(fset, handle);
857 len = strlen("/.intermezzo/") + strlen(fset->fset_name) +strlen("sml");
859 PRESTO_FREE(kmlpath, len);
861 PRESTO_FREE(smlpath, len);
865 /* structure of an extended log record:
867 buf-prefix buf-body [string1 [string2 [string3]]] buf-suffix
869 note: moves offset forward
871 static inline int presto_write_record(struct file *f, loff_t *off,
872 const char *buf, size_t size,
873 const char *string1, int len1,
874 const char *string2, int len2,
875 const char *string3, int len3)
880 prefix_size = size - sizeof(struct kml_suffix);
881 rc = presto_fwrite(f, buf, prefix_size, off);
882 if ( rc != prefix_size ) {
883 CERROR("Write error!\n");
888 if ( string1 && len1 ) {
889 rc = presto_fwrite(f, string1, len1, off);
891 CERROR("Write error!\n");
897 if ( string2 && len2 ) {
898 rc = presto_fwrite(f, string2, len2, off);
900 CERROR("Write error!\n");
906 if ( string3 && len3 ) {
907 rc = presto_fwrite(f, string3, len3, off);
909 CERROR("Write error!\n");
915 rc = presto_fwrite(f, buf + prefix_size,
916 sizeof(struct kml_suffix), off);
917 if ( rc != sizeof(struct kml_suffix) ) {
918 CERROR("Write error!\n");
927 * rec->size must be valid prior to calling this function.
929 * had to export this for branch_reinter in kml_reint.c
931 int presto_log(struct presto_file_set *fset, struct rec_info *rec,
932 const char *buf, size_t size,
933 const char *string1, int len1,
934 const char *string2, int len2,
935 const char *string3, int len3)
938 struct presto_reservation_data rd;
940 struct presto_log_fd *fd;
941 struct kml_suffix *s;
946 /* buf is NULL when no_journal is in effect */
953 fd = &fset->fset_kml;
955 fd = &fset->fset_lml;
958 presto_reserve_record(fset, fd, rec, &rd);
961 if (rec->offset < fset->fset_kml_logical_off) {
962 CERROR("record with pre-trunc offset. tell phil.\n");
965 offset = rec->offset - fset->fset_kml_logical_off;
967 offset = rec->offset;
970 /* now we know the record number */
971 prefix_size = size - sizeof(struct kml_suffix);
972 s = (struct kml_suffix *) (buf + prefix_size);
973 s->recno = cpu_to_le32(rec->recno);
975 rc = presto_write_record(fd->fd_file, &offset, buf, size,
976 string1, len1, string2, len2, string3, len3);
978 CERROR("presto: error writing record to %s\n",
979 rec->is_kml ? "KML" : "LML");
982 presto_release_record(fd, &rd);
984 rc = presto_kml_dispatch(fset);
990 /* read from the record at tail */
991 static int presto_last_record(struct presto_log_fd *fd, loff_t *size,
992 loff_t *tail_offset, __u32 *recno, loff_t tail)
994 struct kml_suffix suffix;
1002 if (tail < sizeof(struct kml_prefix_hdr) + sizeof(suffix)) {
1007 zeroes = tail - sizeof(int);
1008 while ( zeroes >= 0 ) {
1010 rc = presto_fread(fd->fd_file, (char *)&data, sizeof(data),
1012 if ( rc != sizeof(data) ) {
1018 zeroes -= 2 * sizeof(data);
1021 /* zeroes at the begining of file. this is needed to prevent
1022 presto_fread errors -SHP
1024 if (zeroes <= 0) return 0;
1026 zeroes -= sizeof(suffix) + sizeof(int);
1027 rc = presto_fread(fd->fd_file, (char *)&suffix, sizeof(suffix), &zeroes);
1028 if ( rc != sizeof(suffix) ) {
1032 if ( suffix.len > 500 ) {
1033 CERROR("InterMezzo: Warning long record tail at %ld, rec tail_offset at %ld (size %d)\n",
1034 (long) zeroes, (long)*tail_offset, suffix.len);
1037 *recno = suffix.recno;
1039 *tail_offset = zeroes;
1043 static int izo_kml_last_recno(struct presto_log_fd *logfd)
1049 loff_t tail = logfd->fd_file->f_dentry->d_inode->i_size;
1051 rc = presto_last_record(logfd, &size, &tail_offset, &recno, tail);
1057 logfd->fd_offset = tail_offset;
1058 logfd->fd_recno = recno;
1059 CDEBUG(D_JOURNAL, "setting fset_kml->fd_recno to %d, offset %Ld\n",
1060 recno, tail_offset);
1065 struct file *izo_log_open(struct presto_file_set *fset, char *name, int flags)
1067 struct presto_cache *cache = fset->fset_cache;
1072 f = izo_fset_open(fset, name, flags, 0644);
1080 if ( cache != presto_get_cache(f->f_dentry->d_inode) ) {
1081 CERROR("InterMezzo: %s cache does not match fset cache!\n",name);
1082 fset->fset_kml.fd_file = NULL;
1083 filp_close(f, NULL);
1089 if (cache->cache_filter && cache->cache_filter->o_trops &&
1090 cache->cache_filter->o_trops->tr_journal_data) {
1091 cache->cache_filter->o_trops->tr_journal_data
1092 (f->f_dentry->d_inode);
1094 CERROR("InterMezzo WARNING: no file data logging!\n");
1102 int izo_init_kml_file(struct presto_file_set *fset, struct presto_log_fd *logfd)
1108 if (logfd->fd_file) {
1109 CDEBUG(D_INODE, "fset already has KML open\n");
1114 logfd->fd_lock = RW_LOCK_UNLOCKED;
1115 INIT_LIST_HEAD(&logfd->fd_reservations);
1116 f = izo_log_open(fset, "kml", O_RDWR | O_CREAT);
1123 error = izo_kml_last_recno(logfd);
1126 logfd->fd_file = NULL;
1127 filp_close(f, NULL);
1128 CERROR("InterMezzo: IO error in KML of fset %s\n",
1133 fset->fset_lento_off = logfd->fd_offset;
1134 fset->fset_lento_recno = logfd->fd_recno;
1140 int izo_init_last_rcvd_file(struct presto_file_set *fset, struct presto_log_fd *logfd)
1144 struct rec_info recinfo;
1147 if (logfd->fd_file != NULL) {
1148 CDEBUG(D_INODE, "fset already has last_rcvd open\n");
1153 logfd->fd_lock = RW_LOCK_UNLOCKED;
1154 INIT_LIST_HEAD(&logfd->fd_reservations);
1155 f = izo_log_open(fset, "last_rcvd", O_RDWR | O_CREAT);
1162 logfd->fd_offset = f->f_dentry->d_inode->i_size;
1164 error = izo_rep_cache_init(fset);
1166 if (presto_read_kml_logical_offset(&recinfo, fset) == 0) {
1167 fset->fset_kml_logical_off = recinfo.offset;
1169 /* The 'last_rcvd' file doesn't contain a kml offset record,
1170 * probably because we just created 'last_rcvd'. Write one. */
1171 fset->fset_kml_logical_off = 0;
1172 presto_write_kml_logical_offset(fset);
1179 int izo_init_lml_file(struct presto_file_set *fset, struct presto_log_fd *logfd)
1185 if (logfd->fd_file) {
1186 CDEBUG(D_INODE, "fset already has lml open\n");
1191 logfd->fd_lock = RW_LOCK_UNLOCKED;
1192 INIT_LIST_HEAD(&logfd->fd_reservations);
1193 f = izo_log_open(fset, "lml", O_RDWR | O_CREAT);
1200 logfd->fd_offset = f->f_dentry->d_inode->i_size;
1206 /* Get the KML-offset record from the last_rcvd file */
1207 int presto_read_kml_logical_offset(struct rec_info *recinfo,
1208 struct presto_file_set *fset)
1211 struct izo_rcvd_rec rec;
1212 char uuid[16] = {0};
1214 off = izo_rcvd_get(&rec, fset, uuid);
1218 recinfo->offset = rec.lr_local_offset;
1222 int presto_write_kml_logical_offset(struct presto_file_set *fset)
1225 struct izo_rcvd_rec rec;
1226 char uuid[16] = {0};
1228 rc = izo_rcvd_get(&rec, fset, uuid);
1230 memset(&rec, 0, sizeof(rec));
1232 rec.lr_local_offset =
1233 cpu_to_le64(fset->fset_kml_logical_off);
1235 return izo_rcvd_write(fset, &rec);
1238 struct file * presto_copy_kml_tail(struct presto_file_set *fset,
1239 unsigned long int start)
1243 loff_t read_off, write_off, bytes;
1247 /* Copy the tail of 'kml' to 'kml_tmp' */
1248 f = izo_log_open(fset, "kml_tmp", O_RDWR);
1256 bytes = fset->fset_kml.fd_offset - start;
1261 if (bytes > sizeof(buf))
1262 toread = sizeof(buf);
1266 len = presto_fread(fset->fset_kml.fd_file, buf, toread,
1271 if (presto_fwrite(f, buf, len, &write_off) != len) {
1272 filp_close(f, NULL);
1274 return ERR_PTR(-EIO);
1285 /* LML records here */
1286 /* this writes an LML record to the LML file (rec->is_kml =0) */
1287 int presto_write_lml_close(struct rec_info *rec,
1288 struct presto_file_set *fset,
1291 __u64 remote_generation,
1292 struct presto_version *remote_version,
1293 struct presto_version *new_file_ver)
1295 int opcode = KML_OPCODE_CLOSE;
1297 struct dentry *dentry = file->f_dentry;
1305 struct dentry *root;
1310 if ( presto_no_journal(fset) ) {
1314 root = fset->fset_dentry;
1316 BUFF_ALLOC(buffer, NULL);
1317 path = presto_path(dentry, root, buffer, PAGE_SIZE);
1318 CDEBUG(D_INODE, "Path: %s\n", path);
1319 pathlen = cpu_to_le32(MYPATHLEN(buffer, path));
1320 ino = cpu_to_le64(dentry->d_inode->i_ino);
1321 generation = cpu_to_le32(dentry->d_inode->i_generation);
1322 size = sizeof(__u32) * current->group_info->ngroups +
1323 sizeof(struct kml_prefix_hdr) + sizeof(*new_file_ver) +
1324 sizeof(ino) + sizeof(generation) + sizeof(pathlen) +
1325 sizeof(remote_ino) + sizeof(remote_generation) +
1326 sizeof(remote_version) + sizeof(rec->offset) +
1327 sizeof(struct kml_suffix);
1329 if ( size > sizeof(record) )
1330 CERROR("InterMezzo: BUFFER OVERFLOW in %s!\n", __FUNCTION__);
1333 rec->size = size + size_round(le32_to_cpu(pathlen));
1335 logrecord = journal_log_prefix(record, opcode, rec);
1336 logrecord = log_version(logrecord, new_file_ver);
1337 logrecord = logit(logrecord, &ino, sizeof(ino));
1338 logrecord = logit(logrecord, &generation, sizeof(generation));
1339 logrecord = logit(logrecord, &pathlen, sizeof(pathlen));
1340 logrecord = logit(logrecord, &remote_ino, sizeof(remote_ino));
1341 logrecord = logit(logrecord, &remote_generation,
1342 sizeof(remote_generation));
1343 logrecord = log_version(logrecord, remote_version);
1344 logrecord = logit(logrecord, &rec->offset, sizeof(rec->offset));
1345 logrecord = journal_log_suffix(logrecord, record, fset, dentry, rec);
1347 error = presto_log(fset, rec, record, size,
1348 path, size_round(le32_to_cpu(pathlen)),
1358 * Check if the given record is at the end of the file. If it is, truncate
1359 * the lml to the record's offset, removing it. Repeat on prior record,
1360 * until we reach an active record or a reserved record (as defined by the
1361 * reservations list).
1363 static int presto_truncate_lml_tail(struct presto_file_set *fset)
1366 loff_t lml_last_rec;
1367 loff_t lml_last_recsize;
1368 loff_t local_offset;
1370 struct kml_prefix_hdr prefix;
1371 struct inode *inode = fset->fset_lml.fd_file->f_dentry->d_inode;
1376 /* If someone else is already truncating the LML, return. */
1377 write_lock(&fset->fset_lml.fd_lock);
1378 if (fset->fset_lml.fd_truncating == 1 ) {
1379 write_unlock(&fset->fset_lml.fd_lock);
1383 /* someone is about to write to the end of the LML */
1384 if ( !list_empty(&fset->fset_lml.fd_reservations) ) {
1385 write_unlock(&fset->fset_lml.fd_lock);
1389 lml_tail = fset->fset_lml.fd_file->f_dentry->d_inode->i_size;
1390 /* Nothing to truncate?*/
1391 if (lml_tail == 0) {
1392 write_unlock(&fset->fset_lml.fd_lock);
1396 fset->fset_lml.fd_truncating = 1;
1397 write_unlock(&fset->fset_lml.fd_lock);
1399 presto_last_record(&fset->fset_lml, &lml_last_recsize,
1400 &lml_last_rec, &recno, lml_tail);
1401 /* Do we have a record to check? If not we have zeroes at the
1402 beginning of the file. -SHP
1404 if (lml_last_recsize != 0) {
1405 local_offset = lml_last_rec - lml_last_recsize;
1406 rc = presto_fread(fset->fset_lml.fd_file, (char *)&prefix,
1407 sizeof(prefix), &local_offset);
1408 if (rc != sizeof(prefix)) {
1413 if ( prefix.opcode != KML_OPCODE_NOOP ) {
1416 /* We may have zeroes at the end of the file, should
1417 we clear them out? -SHP
1424 handle = presto_trans_start(fset, inode, KML_OPCODE_TRUNC);
1425 if ( IS_ERR(handle) ) {
1431 rc = izo_do_truncate(fset, fset->fset_lml.fd_file->f_dentry,
1432 lml_last_rec - lml_last_recsize, lml_tail);
1433 presto_trans_commit(fset, handle);
1440 CDEBUG(D_JOURNAL, "rc = %d\n", rc);
1441 write_lock(&fset->fset_lml.fd_lock);
1442 fset->fset_lml.fd_truncating = 0;
1443 write_unlock(&fset->fset_lml.fd_lock);
1447 int presto_truncate_lml(struct presto_file_set *fset)
1452 while ( (rc = presto_truncate_lml_tail(fset)) > 0);
1453 if ( rc < 0 && rc != -EALREADY) {
1454 CERROR("truncate_lml error %d\n", rc);
1460 int presto_clear_lml_close(struct presto_file_set *fset, loff_t lml_offset)
1463 struct kml_prefix_hdr record;
1464 loff_t offset = lml_offset;
1468 if ( presto_no_journal(fset) ) {
1473 CDEBUG(D_JOURNAL, "reading prefix: off %ld, size %Zd\n",
1474 (long)lml_offset, sizeof(record));
1475 rc = presto_fread(fset->fset_lml.fd_file, (char *)&record,
1476 sizeof(record), &offset);
1478 if ( rc != sizeof(record) ) {
1479 CERROR("presto: clear_lml io error %d\n", rc);
1484 /* overwrite the prefix */
1485 CDEBUG(D_JOURNAL, "overwriting prefix: off %ld\n", (long)lml_offset);
1486 record.opcode = KML_OPCODE_NOOP;
1487 offset = lml_offset;
1488 /* note: this does just a single transaction in the cache */
1489 rc = presto_fwrite(fset->fset_lml.fd_file, (char *)(&record),
1490 sizeof(record), &offset);
1491 if ( rc != sizeof(record) ) {
1502 /* now a journal function for every operation */
1504 int presto_journal_setattr(struct rec_info *rec, struct presto_file_set *fset,
1505 struct dentry *dentry, struct presto_version *old_ver,
1506 struct izo_rollback_data *rb, struct iattr *iattr)
1508 int opcode = KML_OPCODE_SETATTR;
1509 char *buffer, *path, *logrecord, record[316];
1510 struct dentry *root;
1511 __u32 uid, gid, mode, valid, flags, pathlen;
1512 __u64 fsize, mtime, ctime;
1516 if ( presto_no_journal(fset) ) {
1521 if (!dentry->d_inode || (dentry->d_inode->i_nlink == 0)
1522 || ((dentry->d_parent != dentry) && d_unhashed(dentry))) {
1527 root = fset->fset_dentry;
1529 BUFF_ALLOC(buffer, NULL);
1530 path = presto_path(dentry, root, buffer, PAGE_SIZE);
1531 pathlen = cpu_to_le32(MYPATHLEN(buffer, path));
1532 size = sizeof(__u32) * current->group_info->ngroups +
1533 sizeof(struct kml_prefix_hdr) + sizeof(*old_ver) +
1534 sizeof(valid) + sizeof(mode) + sizeof(uid) + sizeof(gid) +
1535 sizeof(fsize) + sizeof(mtime) + sizeof(ctime) + sizeof(flags) +
1536 sizeof(pathlen) + sizeof(*rb) + sizeof(struct kml_suffix);
1538 if ( size > sizeof(record) )
1539 CERROR("InterMezzo: BUFFER OVERFLOW in %s!\n", __FUNCTION__);
1541 /* Only journal one kind of mtime, and not atime at all. Also don't
1542 * journal bogus data in iattr, to make the journal more compressible.
1544 if (iattr->ia_valid & ATTR_MTIME_SET)
1545 iattr->ia_valid = iattr->ia_valid | ATTR_MTIME;
1546 valid = cpu_to_le32(iattr->ia_valid & ~(ATTR_ATIME | ATTR_MTIME_SET |
1548 mode = iattr->ia_valid & ATTR_MODE ? cpu_to_le32(iattr->ia_mode): 0;
1549 uid = iattr->ia_valid & ATTR_UID ? cpu_to_le32(iattr->ia_uid): 0;
1550 gid = iattr->ia_valid & ATTR_GID ? cpu_to_le32(iattr->ia_gid): 0;
1551 fsize = iattr->ia_valid & ATTR_SIZE ? cpu_to_le64(iattr->ia_size): 0;
1552 mtime = iattr->ia_valid & ATTR_MTIME ? cpu_to_le64(iattr->ia_mtime.tv_sec): 0;
1553 ctime = iattr->ia_valid & ATTR_CTIME ? cpu_to_le64(iattr->ia_ctime.tv_sec): 0;
1554 flags = iattr->ia_valid & ATTR_ATTR_FLAG ?
1555 cpu_to_le32(iattr->ia_attr_flags): 0;
1558 rec->size = size + size_round(le32_to_cpu(pathlen));
1560 logrecord = journal_log_prefix(record, opcode, rec);
1561 logrecord = log_version(logrecord, old_ver);
1562 logrecord = logit(logrecord, &valid, sizeof(valid));
1563 logrecord = logit(logrecord, &mode, sizeof(mode));
1564 logrecord = logit(logrecord, &uid, sizeof(uid));
1565 logrecord = logit(logrecord, &gid, sizeof(gid));
1566 logrecord = logit(logrecord, &fsize, sizeof(fsize));
1567 logrecord = logit(logrecord, &mtime, sizeof(mtime));
1568 logrecord = logit(logrecord, &ctime, sizeof(ctime));
1569 logrecord = logit(logrecord, &flags, sizeof(flags));
1570 logrecord = log_rollback(logrecord, rb);
1571 logrecord = logit(logrecord, &pathlen, sizeof(pathlen));
1572 logrecord = journal_log_suffix(logrecord, record, fset, dentry, rec);
1574 error = presto_log(fset, rec, record, size,
1575 path, size_round(le32_to_cpu(pathlen)),
1583 int presto_get_fileid(int minor, struct presto_file_set *fset,
1584 struct dentry *dentry)
1586 int opcode = KML_OPCODE_GET_FILEID;
1587 struct rec_info rec;
1588 char *buffer, *path, *logrecord, record[4096]; /*include path*/
1589 struct dentry *root;
1590 __u32 uid, gid, pathlen;
1592 struct kml_suffix *suffix;
1596 root = fset->fset_dentry;
1598 uid = cpu_to_le32(dentry->d_inode->i_uid);
1599 gid = cpu_to_le32(dentry->d_inode->i_gid);
1600 BUFF_ALLOC(buffer, NULL);
1601 path = presto_path(dentry, root, buffer, PAGE_SIZE);
1602 pathlen = cpu_to_le32(MYPATHLEN(buffer, path));
1603 size = sizeof(__u32) * current->group_info->ngroups +
1604 sizeof(struct kml_prefix_hdr) + sizeof(pathlen) +
1605 size_round(le32_to_cpu(pathlen)) +
1606 sizeof(struct kml_suffix);
1608 CDEBUG(D_FILE, "kml size: %d\n", size);
1609 if ( size > sizeof(record) )
1610 CERROR("InterMezzo: BUFFER OVERFLOW in %s!\n", __FUNCTION__);
1612 memset(&rec, 0, sizeof(rec));
1616 logrecord = journal_log_prefix(record, opcode, &rec);
1617 logrecord = logit(logrecord, &pathlen, sizeof(pathlen));
1618 logrecord = logit(logrecord, path, size_round(le32_to_cpu(pathlen)));
1619 suffix = (struct kml_suffix *)logrecord;
1620 logrecord = journal_log_suffix(logrecord, record, fset, dentry, &rec);
1621 /* journal_log_suffix expects journal_log to set this */
1624 CDEBUG(D_FILE, "actual kml size: %Zd\n", logrecord - record);
1625 CDEBUG(D_FILE, "get fileid: uid %d, gid %d, path: %s\n", uid, gid,path);
1627 error = izo_upc_get_fileid(minor, size, record,
1628 size_round(le32_to_cpu(pathlen)), path,
1636 int presto_journal_create(struct rec_info *rec, struct presto_file_set *fset,
1637 struct dentry *dentry,
1638 struct presto_version *tgt_dir_ver,
1639 struct presto_version *new_file_ver, int mode)
1641 int opcode = KML_OPCODE_CREATE;
1642 char *buffer, *path, *logrecord, record[292];
1643 struct dentry *root;
1644 __u32 uid, gid, lmode, pathlen;
1648 if ( presto_no_journal(fset) ) {
1653 root = fset->fset_dentry;
1655 uid = cpu_to_le32(dentry->d_inode->i_uid);
1656 gid = cpu_to_le32(dentry->d_inode->i_gid);
1657 lmode = cpu_to_le32(mode);
1659 BUFF_ALLOC(buffer, NULL);
1660 path = presto_path(dentry, root, buffer, PAGE_SIZE);
1661 pathlen = cpu_to_le32(MYPATHLEN(buffer, path));
1662 size = sizeof(__u32) * current->group_info->ngroups +
1663 sizeof(struct kml_prefix_hdr) + 3 * sizeof(*tgt_dir_ver) +
1664 sizeof(lmode) + sizeof(uid) + sizeof(gid) + sizeof(pathlen) +
1665 sizeof(struct kml_suffix);
1667 if ( size > sizeof(record) )
1668 CERROR("InterMezzo: BUFFER OVERFLOW in %s!\n", __FUNCTION__);
1671 rec->size = size + size_round(le32_to_cpu(pathlen));
1673 logrecord = journal_log_prefix(record, opcode, rec);
1674 logrecord = log_version(logrecord, tgt_dir_ver);
1675 logrecord = log_dentry_version(logrecord, dentry->d_parent);
1676 logrecord = log_version(logrecord, new_file_ver);
1677 logrecord = logit(logrecord, &lmode, sizeof(lmode));
1678 logrecord = logit(logrecord, &uid, sizeof(uid));
1679 logrecord = logit(logrecord, &gid, sizeof(gid));
1680 logrecord = logit(logrecord, &pathlen, sizeof(pathlen));
1681 logrecord = journal_log_suffix(logrecord, record, fset, dentry, rec);
1683 error = presto_log(fset, rec, record, size,
1684 path, size_round(le32_to_cpu(pathlen)),
1692 int presto_journal_symlink(struct rec_info *rec, struct presto_file_set *fset,
1693 struct dentry *dentry, const char *target,
1694 struct presto_version *tgt_dir_ver,
1695 struct presto_version *new_link_ver)
1697 int opcode = KML_OPCODE_SYMLINK;
1698 char *buffer, *path, *logrecord, record[292];
1699 struct dentry *root;
1700 __u32 uid, gid, pathlen;
1701 __u32 targetlen = cpu_to_le32(strlen(target));
1705 if ( presto_no_journal(fset) ) {
1710 root = fset->fset_dentry;
1712 uid = cpu_to_le32(dentry->d_inode->i_uid);
1713 gid = cpu_to_le32(dentry->d_inode->i_gid);
1715 BUFF_ALLOC(buffer, NULL);
1716 path = presto_path(dentry, root, buffer, PAGE_SIZE);
1717 pathlen = cpu_to_le32(MYPATHLEN(buffer, path));
1718 size = sizeof(__u32) * current->group_info->ngroups +
1719 sizeof(struct kml_prefix_hdr) + 3 * sizeof(*tgt_dir_ver) +
1720 sizeof(uid) + sizeof(gid) + sizeof(pathlen) +
1721 sizeof(targetlen) + sizeof(struct kml_suffix);
1723 if ( size > sizeof(record) )
1724 CERROR("InterMezzo: BUFFER OVERFLOW in %s!\n", __FUNCTION__);
1727 rec->size = size + size_round(le32_to_cpu(pathlen)) +
1728 size_round(le32_to_cpu(targetlen));
1730 logrecord = journal_log_prefix(record, opcode, rec);
1731 logrecord = log_version(logrecord, tgt_dir_ver);
1732 logrecord = log_dentry_version(logrecord, dentry->d_parent);
1733 logrecord = log_version(logrecord, new_link_ver);
1734 logrecord = logit(logrecord, &uid, sizeof(uid));
1735 logrecord = logit(logrecord, &gid, sizeof(gid));
1736 logrecord = logit(logrecord, &pathlen, sizeof(pathlen));
1737 logrecord = logit(logrecord, &targetlen, sizeof(targetlen));
1738 logrecord = journal_log_suffix(logrecord, record, fset, dentry, rec);
1740 error = presto_log(fset, rec, record, size,
1741 path, size_round(le32_to_cpu(pathlen)),
1742 target, size_round(le32_to_cpu(targetlen)),
1750 int presto_journal_mkdir(struct rec_info *rec, struct presto_file_set *fset,
1751 struct dentry *dentry,
1752 struct presto_version *tgt_dir_ver,
1753 struct presto_version *new_dir_ver, int mode)
1755 int opcode = KML_OPCODE_MKDIR;
1756 char *buffer, *path, *logrecord, record[292];
1757 struct dentry *root;
1758 __u32 uid, gid, lmode, pathlen;
1762 if ( presto_no_journal(fset) ) {
1767 root = fset->fset_dentry;
1769 uid = cpu_to_le32(dentry->d_inode->i_uid);
1770 gid = cpu_to_le32(dentry->d_inode->i_gid);
1771 lmode = cpu_to_le32(mode);
1773 BUFF_ALLOC(buffer, NULL);
1774 path = presto_path(dentry, root, buffer, PAGE_SIZE);
1775 pathlen = cpu_to_le32(MYPATHLEN(buffer, path));
1776 size = sizeof(__u32) * current->group_info->ngroups +
1777 sizeof(struct kml_prefix_hdr) + 3 * sizeof(*tgt_dir_ver) +
1778 sizeof(lmode) + sizeof(uid) + sizeof(gid) + sizeof(pathlen) +
1779 sizeof(struct kml_suffix);
1781 if ( size > sizeof(record) )
1782 CERROR("InterMezzo: BUFFER OVERFLOW in %s!\n", __FUNCTION__);
1785 rec->size = size + size_round(le32_to_cpu(pathlen));
1786 logrecord = journal_log_prefix(record, opcode, rec);
1788 logrecord = log_version(logrecord, tgt_dir_ver);
1789 logrecord = log_dentry_version(logrecord, dentry->d_parent);
1790 logrecord = log_version(logrecord, new_dir_ver);
1791 logrecord = logit(logrecord, &lmode, sizeof(lmode));
1792 logrecord = logit(logrecord, &uid, sizeof(uid));
1793 logrecord = logit(logrecord, &gid, sizeof(gid));
1794 logrecord = logit(logrecord, &pathlen, sizeof(pathlen));
1795 logrecord = journal_log_suffix(logrecord, record, fset, dentry, rec);
1797 error = presto_log(fset, rec, record, size,
1798 path, size_round(le32_to_cpu(pathlen)),
1808 presto_journal_rmdir(struct rec_info *rec, struct presto_file_set *fset,
1809 struct dentry *dir, struct presto_version *tgt_dir_ver,
1810 struct presto_version *old_dir_ver,
1811 struct izo_rollback_data *rb, int len, const char *name)
1813 int opcode = KML_OPCODE_RMDIR;
1814 char *buffer, *path, *logrecord, record[316];
1815 __u32 pathlen, llen;
1816 struct dentry *root;
1820 if ( presto_no_journal(fset) ) {
1825 root = fset->fset_dentry;
1827 llen = cpu_to_le32(len);
1828 BUFF_ALLOC(buffer, NULL);
1829 path = presto_path(dir, root, buffer, PAGE_SIZE);
1830 pathlen = cpu_to_le32(MYPATHLEN(buffer, path));
1831 size = sizeof(__u32) * current->group_info->ngroups +
1832 sizeof(struct kml_prefix_hdr) + 3 * sizeof(*tgt_dir_ver) +
1833 sizeof(pathlen) + sizeof(llen) + sizeof(*rb) +
1834 sizeof(struct kml_suffix);
1836 if ( size > sizeof(record) )
1837 CERROR("InterMezzo: BUFFER OVERFLOW in %s!\n", __FUNCTION__);
1839 CDEBUG(D_JOURNAL, "path: %s (%d), name: %s (%d), size %d\n",
1840 path, pathlen, name, len, size);
1843 rec->size = size + size_round(le32_to_cpu(pathlen)) +
1846 logrecord = journal_log_prefix(record, opcode, rec);
1847 logrecord = log_version(logrecord, tgt_dir_ver);
1848 logrecord = log_dentry_version(logrecord, dir);
1849 logrecord = log_version(logrecord, old_dir_ver);
1850 logrecord = logit(logrecord, rb, sizeof(*rb));
1851 logrecord = logit(logrecord, &pathlen, sizeof(pathlen));
1852 logrecord = logit(logrecord, &llen, sizeof(llen));
1853 logrecord = journal_log_suffix(logrecord, record, fset, dir, rec);
1854 error = presto_log(fset, rec, record, size,
1855 path, size_round(le32_to_cpu(pathlen)),
1856 name, size_round(len),
1866 presto_journal_mknod(struct rec_info *rec, struct presto_file_set *fset,
1867 struct dentry *dentry, struct presto_version *tgt_dir_ver,
1868 struct presto_version *new_node_ver, int mode,
1869 int dmajor, int dminor )
1871 int opcode = KML_OPCODE_MKNOD;
1872 char *buffer, *path, *logrecord, record[292];
1873 struct dentry *root;
1874 __u32 uid, gid, lmode, lmajor, lminor, pathlen;
1878 if ( presto_no_journal(fset) ) {
1883 root = fset->fset_dentry;
1885 uid = cpu_to_le32(dentry->d_inode->i_uid);
1886 gid = cpu_to_le32(dentry->d_inode->i_gid);
1887 lmode = cpu_to_le32(mode);
1888 lmajor = cpu_to_le32(dmajor);
1889 lminor = cpu_to_le32(dminor);
1891 BUFF_ALLOC(buffer, NULL);
1892 path = presto_path(dentry, root, buffer, PAGE_SIZE);
1893 pathlen = cpu_to_le32(MYPATHLEN(buffer, path));
1894 size = sizeof(__u32) * current->group_info->ngroups +
1895 sizeof(struct kml_prefix_hdr) + 3 * sizeof(*tgt_dir_ver) +
1896 sizeof(lmode) + sizeof(uid) + sizeof(gid) + sizeof(lmajor) +
1897 sizeof(lminor) + sizeof(pathlen) +
1898 sizeof(struct kml_suffix);
1900 if ( size > sizeof(record) )
1901 CERROR("InterMezzo: BUFFER OVERFLOW in %s!\n", __FUNCTION__);
1904 rec->size = size + size_round(le32_to_cpu(pathlen));
1906 logrecord = journal_log_prefix(record, opcode, rec);
1907 logrecord = log_version(logrecord, tgt_dir_ver);
1908 logrecord = log_dentry_version(logrecord, dentry->d_parent);
1909 logrecord = log_version(logrecord, new_node_ver);
1910 logrecord = logit(logrecord, &lmode, sizeof(lmode));
1911 logrecord = logit(logrecord, &uid, sizeof(uid));
1912 logrecord = logit(logrecord, &gid, sizeof(gid));
1913 logrecord = logit(logrecord, &lmajor, sizeof(lmajor));
1914 logrecord = logit(logrecord, &lminor, sizeof(lminor));
1915 logrecord = logit(logrecord, &pathlen, sizeof(pathlen));
1916 logrecord = journal_log_suffix(logrecord, record, fset, dentry, rec);
1918 error = presto_log(fset, rec, record, size,
1919 path, size_round(le32_to_cpu(pathlen)),
1928 presto_journal_link(struct rec_info *rec, struct presto_file_set *fset,
1929 struct dentry *src, struct dentry *tgt,
1930 struct presto_version *tgt_dir_ver,
1931 struct presto_version *new_link_ver)
1933 int opcode = KML_OPCODE_LINK;
1934 char *buffer, *srcbuffer, *path, *srcpath, *logrecord, record[292];
1935 __u32 pathlen, srcpathlen;
1936 struct dentry *root;
1940 if ( presto_no_journal(fset) ) {
1945 root = fset->fset_dentry;
1947 BUFF_ALLOC(srcbuffer, NULL);
1948 srcpath = presto_path(src, root, srcbuffer, PAGE_SIZE);
1949 srcpathlen = cpu_to_le32(MYPATHLEN(srcbuffer, srcpath));
1951 BUFF_ALLOC(buffer, srcbuffer);
1952 path = presto_path(tgt, root, buffer, PAGE_SIZE);
1953 pathlen = cpu_to_le32(MYPATHLEN(buffer, path));
1954 size = sizeof(__u32) * current->group_info->ngroups +
1955 sizeof(struct kml_prefix_hdr) + 3 * sizeof(*tgt_dir_ver) +
1956 sizeof(srcpathlen) + sizeof(pathlen) +
1957 sizeof(struct kml_suffix);
1959 if ( size > sizeof(record) )
1960 CERROR("InterMezzo: BUFFER OVERFLOW in %s!\n", __FUNCTION__);
1963 rec->size = size + size_round(le32_to_cpu(pathlen)) +
1964 size_round(le32_to_cpu(srcpathlen));
1966 logrecord = journal_log_prefix(record, opcode, rec);
1967 logrecord = log_version(logrecord, tgt_dir_ver);
1968 logrecord = log_dentry_version(logrecord, tgt->d_parent);
1969 logrecord = log_version(logrecord, new_link_ver);
1970 logrecord = logit(logrecord, &srcpathlen, sizeof(srcpathlen));
1971 logrecord = logit(logrecord, &pathlen, sizeof(pathlen));
1972 logrecord = journal_log_suffix(logrecord, record, fset, tgt, rec);
1974 error = presto_log(fset, rec, record, size,
1975 srcpath, size_round(le32_to_cpu(srcpathlen)),
1976 path, size_round(le32_to_cpu(pathlen)),
1979 BUFF_FREE(srcbuffer);
1986 int presto_journal_rename(struct rec_info *rec, struct presto_file_set *fset,
1987 struct dentry *src, struct dentry *tgt,
1988 struct presto_version *src_dir_ver,
1989 struct presto_version *tgt_dir_ver)
1991 int opcode = KML_OPCODE_RENAME;
1992 char *buffer, *srcbuffer, *path, *srcpath, *logrecord, record[292];
1993 __u32 pathlen, srcpathlen;
1994 struct dentry *root;
1998 if ( presto_no_journal(fset) ) {
2003 root = fset->fset_dentry;
2005 BUFF_ALLOC(srcbuffer, NULL);
2006 srcpath = presto_path(src, root, srcbuffer, PAGE_SIZE);
2007 srcpathlen = cpu_to_le32(MYPATHLEN(srcbuffer, srcpath));
2009 BUFF_ALLOC(buffer, srcbuffer);
2010 path = presto_path(tgt, root, buffer, PAGE_SIZE);
2011 pathlen = cpu_to_le32(MYPATHLEN(buffer, path));
2012 size = sizeof(__u32) * current->group_info->ngroups +
2013 sizeof(struct kml_prefix_hdr) + 4 * sizeof(*src_dir_ver) +
2014 sizeof(srcpathlen) + sizeof(pathlen) +
2015 sizeof(struct kml_suffix);
2017 if ( size > sizeof(record) )
2018 CERROR("InterMezzo: BUFFER OVERFLOW in %s!\n", __FUNCTION__);
2021 rec->size = size + size_round(le32_to_cpu(pathlen)) +
2022 size_round(le32_to_cpu(srcpathlen));
2024 logrecord = journal_log_prefix(record, opcode, rec);
2025 logrecord = log_version(logrecord, src_dir_ver);
2026 logrecord = log_dentry_version(logrecord, src->d_parent);
2027 logrecord = log_version(logrecord, tgt_dir_ver);
2028 logrecord = log_dentry_version(logrecord, tgt->d_parent);
2029 logrecord = logit(logrecord, &srcpathlen, sizeof(srcpathlen));
2030 logrecord = logit(logrecord, &pathlen, sizeof(pathlen));
2031 logrecord = journal_log_suffix(logrecord, record, fset, tgt, rec);
2033 error = presto_log(fset, rec, record, size,
2034 srcpath, size_round(le32_to_cpu(srcpathlen)),
2035 path, size_round(le32_to_cpu(pathlen)),
2039 BUFF_FREE(srcbuffer);
2044 int presto_journal_unlink(struct rec_info *rec, struct presto_file_set *fset,
2045 struct dentry *dir, struct presto_version *tgt_dir_ver,
2046 struct presto_version *old_file_ver,
2047 struct izo_rollback_data *rb, struct dentry *dentry,
2048 char *old_target, int old_targetlen)
2050 int opcode = KML_OPCODE_UNLINK;
2051 char *buffer, *path, *logrecord, record[316];
2053 __u32 pathlen, llen;
2054 struct dentry *root;
2055 int error, size, len;
2058 if ( presto_no_journal(fset) ) {
2063 root = fset->fset_dentry;
2065 name = dentry->d_name.name;
2066 len = dentry->d_name.len;
2068 llen = cpu_to_le32(len);
2069 BUFF_ALLOC(buffer, NULL);
2070 path = presto_path(dir, root, buffer, PAGE_SIZE);
2071 pathlen = cpu_to_le32(MYPATHLEN(buffer, path));
2072 size = sizeof(__u32) * current->group_info->ngroups +
2073 sizeof(struct kml_prefix_hdr) + 3 * sizeof(*tgt_dir_ver) +
2074 sizeof(pathlen) + sizeof(llen) + sizeof(*rb) +
2075 sizeof(old_targetlen) + sizeof(struct kml_suffix);
2077 if ( size > sizeof(record) )
2078 CERROR("InterMezzo: BUFFER OVERFLOW in %s!\n", __FUNCTION__);
2081 rec->size = size + size_round(le32_to_cpu(pathlen)) + size_round(len) +
2082 size_round(old_targetlen);
2084 logrecord = journal_log_prefix(record, opcode, rec);
2085 logrecord = log_version(logrecord, tgt_dir_ver);
2086 logrecord = log_dentry_version(logrecord, dir);
2087 logrecord = log_version(logrecord, old_file_ver);
2088 logrecord = log_rollback(logrecord, rb);
2089 logrecord = logit(logrecord, &pathlen, sizeof(pathlen));
2090 logrecord = logit(logrecord, &llen, sizeof(llen));
2091 logrecord = logit(logrecord, &old_targetlen, sizeof(old_targetlen));
2092 logrecord = journal_log_suffix(logrecord, record, fset, dir, rec);
2094 error = presto_log(fset, rec, record, size,
2095 path, size_round(le32_to_cpu(pathlen)),
2096 name, size_round(len),
2097 old_target, size_round(old_targetlen));
2105 presto_journal_close(struct rec_info *rec, struct presto_file_set *fset,
2106 struct presto_file_data *fd, struct dentry *dentry,
2107 struct presto_version *old_file_ver,
2108 struct presto_version *new_file_ver)
2110 int opcode = KML_OPCODE_CLOSE;
2111 char *buffer, *path, *logrecord, record[316];
2112 struct dentry *root;
2114 __u32 pathlen, generation;
2119 __u32 open_groups[NGROUPS_SMALL];
2126 if ( presto_no_journal(fset) ) {
2131 if (!dentry->d_inode || (dentry->d_inode->i_nlink == 0)
2132 || ((dentry->d_parent != dentry) && d_unhashed(dentry))) {
2137 root = fset->fset_dentry;
2140 open_ngroups = fd->fd_ngroups;
2141 for (i = 0; i < fd->fd_ngroups; i++)
2142 open_groups[i] = (__u32) fd->fd_groups[i];
2143 open_mode = fd->fd_mode;
2144 open_uid = fd->fd_uid;
2145 open_gid = fd->fd_gid;
2146 open_fsuid = fd->fd_fsuid;
2147 open_fsgid = fd->fd_fsgid;
2149 open_ngroups = current->group_info->ngroups;
2150 for (i=0; i<current->group_info->ngroups; i++)
2151 open_groups[i] = (__u32) GROUP_AT(current->group_info,i);
2152 open_mode = dentry->d_inode->i_mode;
2153 open_uid = dentry->d_inode->i_uid;
2154 open_gid = dentry->d_inode->i_gid;
2155 open_fsuid = current->fsuid;
2156 open_fsgid = current->fsgid;
2158 BUFF_ALLOC(buffer, NULL);
2159 path = presto_path(dentry, root, buffer, PAGE_SIZE);
2160 pathlen = cpu_to_le32(MYPATHLEN(buffer, path));
2161 ino = cpu_to_le64(dentry->d_inode->i_ino);
2162 generation = cpu_to_le32(dentry->d_inode->i_generation);
2163 size = sizeof(__u32) * open_ngroups +
2164 sizeof(open_mode) + sizeof(open_uid) + sizeof(open_gid) +
2165 sizeof(struct kml_prefix_hdr) + sizeof(*old_file_ver) +
2166 sizeof(*new_file_ver) + sizeof(ino) + sizeof(generation) +
2167 sizeof(pathlen) + sizeof(struct kml_suffix);
2169 if ( size > sizeof(record) )
2170 CERROR("InterMezzo: BUFFER OVERFLOW in %s!\n", __FUNCTION__);
2173 rec->size = size + size_round(le32_to_cpu(pathlen));
2175 logrecord = journal_log_prefix_with_groups_and_ids(
2176 record, opcode, rec, open_ngroups, open_groups,
2177 open_fsuid, open_fsgid);
2178 logrecord = logit(logrecord, &open_mode, sizeof(open_mode));
2179 logrecord = logit(logrecord, &open_uid, sizeof(open_uid));
2180 logrecord = logit(logrecord, &open_gid, sizeof(open_gid));
2181 logrecord = log_version(logrecord, old_file_ver);
2182 logrecord = log_version(logrecord, new_file_ver);
2183 logrecord = logit(logrecord, &ino, sizeof(ino));
2184 logrecord = logit(logrecord, &generation, sizeof(generation));
2185 logrecord = logit(logrecord, &pathlen, sizeof(pathlen));
2186 logrecord = journal_log_suffix(logrecord, record, fset, dentry, rec);
2188 error = presto_log(fset, rec, record, size,
2189 path, size_round(le32_to_cpu(pathlen)),
2197 int presto_rewrite_close(struct rec_info *rec, struct presto_file_set *fset,
2198 char *path, __u32 pathlen,
2199 int ngroups, __u32 *groups,
2200 __u64 ino, __u32 generation,
2201 struct presto_version *new_file_ver)
2203 int opcode = KML_OPCODE_CLOSE;
2204 char *logrecord, record[292];
2205 struct dentry *root;
2210 if ( presto_no_journal(fset) ) {
2215 root = fset->fset_dentry;
2217 size = sizeof(__u32) * ngroups +
2218 sizeof(struct kml_prefix_hdr) + sizeof(*new_file_ver) +
2219 sizeof(ino) + sizeof(generation) +
2220 sizeof(le32_to_cpu(pathlen)) +
2221 sizeof(struct kml_suffix);
2223 if ( size > sizeof(record) )
2224 CERROR("InterMezzo: BUFFER OVERFLOW in %s!\n", __FUNCTION__);
2227 rec->size = size + size_round(le32_to_cpu(pathlen));
2229 logrecord = journal_log_prefix_with_groups(record, opcode, rec,
2231 logrecord = log_version(logrecord, new_file_ver);
2232 logrecord = logit(logrecord, &ino, sizeof(ino));
2233 logrecord = logit(logrecord, &generation, sizeof(generation));
2234 logrecord = logit(logrecord, &pathlen, sizeof(pathlen));
2235 logrecord = journal_log_suffix(logrecord, record, fset, NULL, rec);
2237 error = presto_log(fset, rec, record, size,
2238 path, size_round(le32_to_cpu(pathlen)),
2246 /* write closes for the local close records in the LML */
2247 int presto_complete_lml(struct presto_file_set *fset)
2249 __u32 groups[NGROUPS_SMALL];
2254 struct rec_info rec;
2256 struct presto_version new_file_ver;
2261 __u32 remote_generation;
2262 __u32 remote_version;
2265 struct file *file = fset->fset_lml.fd_file;
2266 struct kml_prefix_hdr prefix;
2272 if (lml_offset >= file->f_dentry->d_inode->i_size) {
2277 read_offset = lml_offset;
2278 rc = presto_fread(file, (char *)&prefix,
2279 sizeof(prefix), &read_offset);
2280 if ( rc != sizeof(prefix) ) {
2282 CERROR("presto_complete_lml: ioerror - 1, tell Peter\n");
2286 if ( prefix.opcode == KML_OPCODE_NOOP ) {
2287 lml_offset += prefix.len;
2291 rc = presto_fread(file, (char *)groups,
2292 prefix.ngroups * sizeof(__u32), &read_offset);
2293 if ( rc != prefix.ngroups * sizeof(__u32) ) {
2295 CERROR("presto_complete_lml: ioerror - 2, tell Peter\n");
2299 rc = presto_fread(file, (char *)&close_rec,
2300 sizeof(close_rec), &read_offset);
2301 if ( rc != sizeof(close_rec) ) {
2303 CERROR("presto_complete_lml: ioerror - 3, tell Peter\n");
2307 /* is this a backfetch or a close record? */
2308 if ( le64_to_cpu(close_rec.remote_ino) != 0 ) {
2309 lml_offset += prefix.len;
2313 BUFF_ALLOC(buffer, NULL);
2314 rc = presto_fread(file, (char *)buffer,
2315 le32_to_cpu(close_rec.pathlen), &read_offset);
2316 if ( rc != le32_to_cpu(close_rec.pathlen) ) {
2318 CERROR("presto_complete_lml: ioerror - 4, tell Peter\n");
2322 handle = presto_trans_start(fset, file->f_dentry->d_inode,
2323 KML_OPCODE_RELEASE);
2324 if ( IS_ERR(handle) ) {
2329 rc = presto_clear_lml_close(fset, lml_offset);
2331 CERROR("error during clearing: %d\n", rc);
2332 presto_trans_commit(fset, handle);
2337 rc = presto_rewrite_close(&rec, fset, buffer, close_rec.pathlen,
2338 prefix.ngroups, groups,
2339 close_rec.ino, close_rec.generation,
2340 &close_rec.new_file_ver);
2342 CERROR("error during rewrite close: %d\n", rc);
2343 presto_trans_commit(fset, handle);
2348 presto_trans_commit(fset, handle);
2350 CERROR("error during truncation: %d\n", rc);
2355 lml_offset += prefix.len;
2356 CDEBUG(D_JOURNAL, "next LML record at: %ld\n", (long)lml_offset);
2364 #ifdef CONFIG_FS_EXT_ATTR
2365 /* Journal an ea operation. A NULL buffer implies the attribute is
2366 * getting deleted. In this case we simply change the opcode, but nothing
2369 int presto_journal_set_ext_attr (struct rec_info *rec,
2370 struct presto_file_set *fset,
2371 struct dentry *dentry,
2372 struct presto_version *ver, const char *name,
2373 const char *buffer, int buffer_len,
2376 int opcode = (buffer == NULL) ?
2377 KML_OPCODE_DELEXTATTR :
2378 KML_OPCODE_SETEXTATTR ;
2379 char *temp, *path, *logrecord, record[292];
2380 struct dentry *root;
2382 __u32 namelen=cpu_to_le32(strnlen(name,PRESTO_EXT_ATTR_NAME_MAX));
2383 __u32 buflen=(buffer != NULL)? cpu_to_le32(buffer_len): cpu_to_le32(0);
2384 __u32 mode, pathlen;
2387 if ( presto_no_journal(fset) ) {
2392 if (!dentry->d_inode || (dentry->d_inode->i_nlink == 0)
2393 || ((dentry->d_parent != dentry) && d_unhashed(dentry))) {
2398 root = fset->fset_dentry;
2400 BUFF_ALLOC(temp, NULL);
2401 path = presto_path(dentry, root, temp, PAGE_SIZE);
2402 pathlen = cpu_to_le32(MYPATHLEN(temp, path));
2404 flags=cpu_to_le32(flags);
2405 /* Ugly, but needed. posix ACLs change the mode without using
2406 * setattr, we need to record these changes. The EA code per se
2407 * is not really affected.
2409 mode=cpu_to_le32(dentry->d_inode->i_mode);
2411 size = sizeof(__u32) * current->group_info->ngroups +
2412 sizeof(struct kml_prefix_hdr) +
2413 2 * sizeof(struct presto_version) +
2414 sizeof(flags) + sizeof(mode) + sizeof(namelen) +
2415 sizeof(buflen) + sizeof(pathlen) +
2416 sizeof(struct kml_suffix);
2418 if ( size > sizeof(record) )
2419 CERROR("InterMezzo: BUFFER OVERFLOW in %s!\n", __FUNCTION__);
2422 /* Make space for a path, a attr name and value*/
2423 /* We use the buflen instead of buffer_len to make sure that we
2424 * journal the right length. This may be a little paranoid, but
2425 * with 64 bits round the corner, I would rather be safe than sorry!
2426 * Also this handles deletes with non-zero buffer_lengths correctly.
2429 rec->size = size + size_round(le32_to_cpu(pathlen)) +
2430 size_round(le32_to_cpu(namelen)) +
2431 size_round(le32_to_cpu(buflen));
2433 logrecord = journal_log_prefix(record, opcode, rec);
2434 logrecord = log_version(logrecord, ver);
2435 logrecord = log_dentry_version(logrecord, dentry);
2436 logrecord = logit(logrecord, &flags, sizeof(flags));
2437 logrecord = logit(logrecord, &mode, sizeof(flags));
2438 logrecord = logit(logrecord, &pathlen, sizeof(pathlen));
2439 logrecord = logit(logrecord, &namelen, sizeof(namelen));
2440 logrecord = logit(logrecord, &buflen, sizeof(buflen));
2441 logrecord = journal_log_suffix(logrecord, record, fset, dentry, rec);
2443 error = presto_log(fset, rec, record, size,
2444 path, size_round(le32_to_cpu(pathlen)),
2445 name, size_round(le32_to_cpu(namelen)),
2446 buffer, size_round(le32_to_cpu(buflen)));