linux 2.6.16.38 w/ vs2.0.3-rc1
[linux-2.6.git] / include / linux / relayfs_fs.h
index 2c52874..7342e66 100644 (file)
@@ -5,8 +5,6 @@
  * Copyright (C) 1999, 2000, 2001, 2002 - Karim Yaghmour (karim@opersys.com)
  *
  * RelayFS definitions and declarations
- *
- * Please see Documentation/filesystems/relayfs.txt for more info.
  */
 
 #ifndef _LINUX_RELAYFS_FS_H
 #include <linux/wait.h>
 #include <linux/list.h>
 #include <linux/fs.h>
+#include <linux/poll.h>
+#include <linux/kref.h>
 
 /*
- * Tracks changes to rchan struct
- */
-#define RELAYFS_CHANNEL_VERSION                1
-
-/*
- * Maximum number of simultaneously open channels
- */
-#define RELAY_MAX_CHANNELS             256
-
-/*
- * Relay properties
- */
-#define RELAY_MIN_BUFS                 2
-#define RELAY_MIN_BUFSIZE              4096
-#define RELAY_MAX_BUFS                 256
-#define RELAY_MAX_BUF_SIZE             0x1000000
-#define RELAY_MAX_TOTAL_BUF_SIZE       0x8000000
-
-/*
- * Lockless scheme utility macros
- */
-#define RELAY_MAX_BUFNO(bufno_bits) (1UL << (bufno_bits))
-#define RELAY_BUF_SIZE(offset_bits) (1UL << (offset_bits))
-#define RELAY_BUF_OFFSET_MASK(offset_bits) (RELAY_BUF_SIZE(offset_bits) - 1)
-#define RELAY_BUFNO_GET(index, offset_bits) ((index) >> (offset_bits))
-#define RELAY_BUF_OFFSET_GET(index, mask) ((index) & (mask))
-#define RELAY_BUF_OFFSET_CLEAR(index, mask) ((index) & ~(mask))
-
-/*
- * Flags returned by relay_reserve()
- */
-#define RELAY_BUFFER_SWITCH_NONE       0x0
-#define RELAY_WRITE_DISCARD_NONE       0x0
-#define RELAY_BUFFER_SWITCH            0x1
-#define RELAY_WRITE_DISCARD            0x2
-#define RELAY_WRITE_TOO_LONG           0x4
-
-/*
- * Relay attribute flags
- */
-#define RELAY_DELIVERY_BULK            0x1
-#define RELAY_DELIVERY_PACKET          0x2
-#define RELAY_SCHEME_LOCKLESS          0x4
-#define RELAY_SCHEME_LOCKING           0x8
-#define RELAY_SCHEME_ANY               0xC
-#define RELAY_TIMESTAMP_TSC            0x10
-#define RELAY_TIMESTAMP_GETTIMEOFDAY   0x20
-#define RELAY_TIMESTAMP_ANY            0x30
-#define RELAY_USAGE_SMP                        0x40
-#define RELAY_USAGE_GLOBAL             0x80
-#define RELAY_MODE_CONTINUOUS          0x100
-#define RELAY_MODE_NO_OVERWRITE                0x200
-
-/*
- * Flags for needs_resize() callback
+ * Tracks changes to rchan/rchan_buf structs
  */
-#define RELAY_RESIZE_NONE      0x0
-#define RELAY_RESIZE_EXPAND    0x1
-#define RELAY_RESIZE_SHRINK    0x2
-#define RELAY_RESIZE_REPLACE   0x4
-#define RELAY_RESIZE_REPLACED  0x8
+#define RELAYFS_CHANNEL_VERSION                6
 
 /*
- * Values for fileop_notify() callback
+ * Per-cpu relay channel buffer
  */
-enum relay_fileop
+struct rchan_buf
 {
-       RELAY_FILE_OPEN,
-       RELAY_FILE_CLOSE,
-       RELAY_FILE_MAP,
-       RELAY_FILE_UNMAP
-};
+       void *start;                    /* start of channel buffer */
+       void *data;                     /* start of current sub-buffer */
+       size_t offset;                  /* current offset into sub-buffer */
+       size_t subbufs_produced;        /* count of sub-buffers produced */
+       size_t subbufs_consumed;        /* count of sub-buffers consumed */
+       struct rchan *chan;             /* associated channel */
+       wait_queue_head_t read_wait;    /* reader wait queue */
+       struct work_struct wake_readers; /* reader wake-up work struct */
+       struct dentry *dentry;          /* channel file dentry */
+       struct kref kref;               /* channel buffer refcount */
+       struct page **page_array;       /* array of current buffer pages */
+       unsigned int page_count;        /* number of current buffer pages */
+       unsigned int finalized;         /* buffer has been finalized */
+       size_t *padding;                /* padding counts per sub-buffer */
+       size_t prev_padding;            /* temporary variable */
+       size_t bytes_consumed;          /* bytes consumed in cur read subbuf */
+       unsigned int cpu;               /* this buf's cpu */
+} ____cacheline_aligned;
 
 /*
- * Data structure returned by relay_info()
+ * Relay channel data structure
  */
-struct rchan_info
+struct rchan
 {
-       u32 flags;              /* relay attribute flags for channel */
-       u32 buf_size;           /* channel's sub-buffer size */
-       char *buf_addr;         /* address of channel start */
-       u32 alloc_size;         /* total buffer size actually allocated */
-       u32 n_bufs;             /* number of sub-buffers in channel */
-       u32 cur_idx;            /* current write index into channel */
-       u32 bufs_produced;      /* current count of sub-buffers produced */
-       u32 bufs_consumed;      /* current count of sub-buffers consumed */
-       u32 buf_id;             /* buf_id of current sub-buffer */
-       int buffer_complete[RELAY_MAX_BUFS];    /* boolean per sub-buffer */
-       int unused_bytes[RELAY_MAX_BUFS];       /* count per sub-buffer */
+       u32 version;                    /* the version of this struct */
+       size_t subbuf_size;             /* sub-buffer size */
+       size_t n_subbufs;               /* number of sub-buffers per buffer */
+       size_t alloc_size;              /* total buffer size allocated */
+       struct rchan_callbacks *cb;     /* client callbacks */
+       struct kref kref;               /* channel refcount */
+       void *private_data;             /* for user-defined data */
+       size_t last_toobig;             /* tried to log event > subbuf size */
+       struct rchan_buf *buf[NR_CPUS]; /* per-cpu channel buffers */
 };
 
 /*
@@ -117,570 +70,218 @@ struct rchan_info
 struct rchan_callbacks
 {
        /*
-        * buffer_start - called at the beginning of a new sub-buffer
-        * @rchan_id: the channel id
-        * @current_write_pos: position in sub-buffer client should write to
-        * @buffer_id: the id of the new sub-buffer
-        * @start_time: the timestamp associated with the start of sub-buffer
-        * @start_tsc: the TSC associated with the timestamp, if using_tsc
-        * @using_tsc: boolean, indicates whether start_tsc is valid
+        * subbuf_start - called on buffer-switch to a new sub-buffer
+        * @buf: the channel buffer containing the new sub-buffer
+        * @subbuf: the start of the new sub-buffer
+        * @prev_subbuf: the start of the previous sub-buffer
+        * @prev_padding: unused space at the end of previous sub-buffer
         *
-        * Return value should be the number of bytes written by the client.
+        * The client should return 1 to continue logging, 0 to stop
+        * logging.
         *
-        * See Documentation/filesystems/relayfs.txt for details.
-        */
-       int (*buffer_start) (int rchan_id,
-                            char *current_write_pos,
-                            u32 buffer_id,
-                            struct timeval start_time,
-                            u32 start_tsc,
-                            int using_tsc);
-
-       /*
-        * buffer_end - called at the end of a sub-buffer
-        * @rchan_id: the channel id
-        * @current_write_pos: position in sub-buffer of end of data
-        * @end_of_buffer: the position of the end of the sub-buffer
-        * @end_time: the timestamp associated with the end of the sub-buffer
-        * @end_tsc: the TSC associated with the end_time, if using_tsc
-        * @using_tsc: boolean, indicates whether end_tsc is valid
+        * NOTE: subbuf_start will also be invoked when the buffer is
+        *       created, so that the first sub-buffer can be initialized
+        *       if necessary.  In this case, prev_subbuf will be NULL.
         *
-        * Return value should be the number of bytes written by the client.
-        *
-        * See Documentation/filesystems/relayfs.txt for details.
+        * NOTE: the client can reserve bytes at the beginning of the new
+        *       sub-buffer by calling subbuf_start_reserve() in this callback.
         */
-       int (*buffer_end) (int rchan_id,
-                          char *current_write_pos,
-                          char *end_of_buffer,
-                          struct timeval end_time,
-                          u32 end_tsc,
-                          int using_tsc);
+       int (*subbuf_start) (struct rchan_buf *buf,
+                            void *subbuf,
+                            void *prev_subbuf,
+                            size_t prev_padding);
 
        /*
-        * deliver - called when data is ready for the client
-        * @rchan_id: the channel id
-        * @from: the start of the delivered data
-        * @len: the length of the delivered data
+        * buf_mapped - relayfs buffer mmap notification
+        * @buf: the channel buffer
+        * @filp: relayfs file pointer
         *
-        * See Documentation/filesystems/relayfs.txt for details.
+        * Called when a relayfs file is successfully mmapped
         */
-       void (*deliver) (int rchan_id, char *from, u32 len);
+        void (*buf_mapped)(struct rchan_buf *buf,
+                          struct file *filp);
 
        /*
-        * user_deliver - called when data has been written from userspace
-        * @rchan_id: the channel id
-        * @from: the start of the delivered data
-        * @len: the length of the delivered data
+        * buf_unmapped - relayfs buffer unmap notification
+        * @buf: the channel buffer
+        * @filp: relayfs file pointer
         *
-        * See Documentation/filesystems/relayfs.txt for details.
+        * Called when a relayfs file is successfully unmapped
         */
-       void (*user_deliver) (int rchan_id, char *from, u32 len);
-
+        void (*buf_unmapped)(struct rchan_buf *buf,
+                            struct file *filp);
        /*
-        * needs_resize - called when a resizing event occurs
-        * @rchan_id: the channel id
-        * @resize_type: the type of resizing event
-        * @suggested_buf_size: the suggested new sub-buffer size
-        * @suggested_buf_size: the suggested new number of sub-buffers
+        * create_buf_file - create file to represent a relayfs channel buffer
+        * @filename: the name of the file to create
+        * @parent: the parent of the file to create
+        * @mode: the mode of the file to create
+        * @buf: the channel buffer
+        * @is_global: outparam - set non-zero if the buffer should be global
         *
-        * See Documentation/filesystems/relayfs.txt for details.
-        */
-       void (*needs_resize)(int rchan_id,
-                            int resize_type,
-                            u32 suggested_buf_size,
-                            u32 suggested_n_bufs);
-
-       /*
-        * fileop_notify - called on open/close/mmap/munmap of a relayfs file
-        * @rchan_id: the channel id
-        * @filp: relayfs file pointer
-        * @fileop: which file operation is in progress
+        * Called during relay_open(), once for each per-cpu buffer,
+        * to allow the client to create a file to be used to
+        * represent the corresponding channel buffer.  If the file is
+        * created outside of relayfs, the parent must also exist in
+        * that filesystem.
+        *
+        * The callback should return the dentry of the file created
+        * to represent the relay buffer.
         *
-        * The return value can direct the outcome of the operation.
+        * Setting the is_global outparam to a non-zero value will
+        * cause relay_open() to create a single global buffer rather
+        * than the default set of per-cpu buffers.
         *
-        * See Documentation/filesystems/relayfs.txt for details.
+        * See Documentation/filesystems/relayfs.txt for more info.
         */
-        int (*fileop_notify)(int rchan_id,
-                            struct file *filp,
-                            enum relay_fileop fileop);
+       struct dentry *(*create_buf_file)(const char *filename,
+                                         struct dentry *parent,
+                                         int mode,
+                                         struct rchan_buf *buf,
+                                         int *is_global);
 
        /*
-        * ioctl - called in ioctl context from userspace
-        * @rchan_id: the channel id
-        * @cmd: ioctl cmd
-        * @arg: ioctl cmd arg
+        * remove_buf_file - remove file representing a relayfs channel buffer
+        * @dentry: the dentry of the file to remove
         *
-        * The return value is returned as the value from the ioctl call.
+        * Called during relay_close(), once for each per-cpu buffer,
+        * to allow the client to remove a file used to represent a
+        * channel buffer.
         *
-        * See Documentation/filesystems/relayfs.txt for details.
+        * The callback should return 0 if successful, negative if not.
         */
-       int (*ioctl) (int rchan_id, unsigned int cmd, unsigned long arg);
+       int (*remove_buf_file)(struct dentry *dentry);
 };
 
 /*
- * Lockless scheme-specific data
+ * relayfs kernel API, fs/relayfs/relay.c
  */
-struct lockless_rchan
-{
-       u8 bufno_bits;          /* # bits used for sub-buffer id */
-       u8 offset_bits;         /* # bits used for offset within sub-buffer */
-       u32 index;              /* current index = sub-buffer id and offset */
-       u32 offset_mask;        /* used to obtain offset portion of index */
-       u32 index_mask;         /* used to mask off unused bits index */
-       atomic_t fill_count[RELAY_MAX_BUFS];    /* fill count per sub-buffer */
-};
-
-/*
- * Locking scheme-specific data
- */
-struct locking_rchan
-{
-       char *write_buf;                /* start of write sub-buffer */
-       char *write_buf_end;            /* end of write sub-buffer */
-       char *current_write_pos;        /* current write pointer */
-       char *write_limit;              /* takes reserves into account */
-       char *in_progress_event_pos;    /* used for interrupted writes */
-       u16 in_progress_event_size;     /* used for interrupted writes */
-       char *interrupted_pos;          /* used for interrupted writes */
-       u16 interrupting_size;          /* used for interrupted writes */
-       spinlock_t lock;                /* channel lock for locking scheme */
-};
-
-struct relay_ops;
-
-/*
- * Offset resizing data structure
- */
-struct resize_offset
-{
-       u32 ge;
-       u32 le;
-       int delta;
-};
-
-/*
- * Relay channel data structure
- */
-struct rchan
-{
-       u32 version;                    /* the version of this struct */
-       char *buf;                      /* the channel buffer */
-       union
-       {
-               struct lockless_rchan lockless;
-               struct locking_rchan locking;
-       } scheme;                       /* scheme-specific channel data */
-
-       int id;                         /* the channel id */
-       struct rchan_callbacks *callbacks;      /* client callbacks */
-       u32 flags;                      /* relay channel attributes */
-       u32 buf_id;                     /* current sub-buffer id */
-       u32 buf_idx;                    /* current sub-buffer index */
-
-       atomic_t mapped;                /* map count */
-
-       atomic_t suspended;             /* channel suspended i.e full? */
-       int half_switch;                /* used internally for suspend */
-
-       struct timeval  buf_start_time; /* current sub-buffer start time */
-       u32 buf_start_tsc;              /* current sub-buffer start TSC */
-       
-       u32 buf_size;                   /* sub-buffer size */
-       u32 alloc_size;                 /* total buffer size allocated */
-       u32 n_bufs;                     /* number of sub-buffers */
-
-       u32 bufs_produced;              /* count of sub-buffers produced */
-       u32 bufs_consumed;              /* count of sub-buffers consumed */
-       u32 bytes_consumed;             /* bytes consumed in cur sub-buffer */
-
-       int initialized;                /* first buffer initialized? */
-       int finalized;                  /* channel finalized? */
-
-       u32 start_reserve;              /* reserve at start of sub-buffers */
-       u32 end_reserve;                /* reserve at end of sub-buffers */
-       u32 rchan_start_reserve;        /* additional reserve sub-buffer 0 */
-       
-       struct dentry *dentry;          /* channel file dentry */
-
-       wait_queue_head_t read_wait;    /* VFS read wait queue */
-       wait_queue_head_t write_wait;   /* VFS write wait queue */
-       struct work_struct wake_readers; /* reader wake-up work struct */
-       struct work_struct wake_writers; /* reader wake-up work struct */
-       atomic_t refcount;              /* channel refcount */
-
-       struct relay_ops *relay_ops;    /* scheme-specific channel ops */
-
-       int unused_bytes[RELAY_MAX_BUFS]; /* unused count per sub-buffer */
-
-       struct semaphore resize_sem;    /* serializes alloc/repace */
-       struct work_struct work;        /* resize allocation work struct */
-
-       struct list_head open_readers;  /* open readers for this channel */
-       rwlock_t open_readers_lock;     /* protection for open_readers list */
-
-       char *init_buf;                 /* init channel buffer, if non-NULL */
-       
-       u32 resize_min;                 /* minimum resized total buffer size */
-       u32 resize_max;                 /* maximum resized total buffer size */
-       char *resize_buf;               /* for autosize alloc/free */
-       u32 resize_buf_size;            /* resized sub-buffer size */
-       u32 resize_n_bufs;              /* resized number of sub-buffers */
-       u32 resize_alloc_size;          /* resized actual total size */
-       int resizing;                   /* is resizing in progress? */
-       int resize_err;                 /* resizing err code */
-       int resize_failures;            /* number of resize failures */
-       int replace_buffer;             /* is the alloced buffer ready?  */
-       struct resize_offset resize_offset; /* offset change */
-       struct timer_list shrink_timer; /* timer used for shrinking */
-       int resize_order;               /* size of last resize */
-       u32 expand_buf_id;              /* subbuf id expand will occur at */
-
-       struct page **buf_page_array;   /* array of current buffer pages */
-       int buf_page_count;             /* number of current buffer pages */
-       struct page **expand_page_array;/* new pages to be inserted */
-       int expand_page_count;          /* number of new pages */
-       struct page **shrink_page_array;/* old pages to be freed */
-       int shrink_page_count;          /* number of old pages */
-       struct page **resize_page_array;/* will become current pages */
-       int resize_page_count;          /* number of resize pages */
-       struct page **old_buf_page_array; /* hold for freeing */
-} ____cacheline_aligned;
-
-/*
- * Relay channel reader struct
- */
-struct rchan_reader
-{
-       struct list_head list;          /* for list inclusion */
-       struct rchan *rchan;            /* the channel we're reading from */
-       int auto_consume;               /* does this reader auto-consume? */
-       u32 bufs_consumed;              /* buffers this reader has consumed */
-       u32 bytes_consumed;             /* bytes consumed in cur sub-buffer */
-       int offset_changed;             /* have channel offsets changed? */
-       int vfs_reader;                 /* are we a VFS reader? */
-       int map_reader;                 /* are we an mmap reader? */
-
-       union
-       {
-               struct file *file;
-               u32 f_pos;
-       } pos;                          /* current read offset */
-};
-
-/*
- * These help make union member access less tedious
- */
-#define channel_buffer(rchan) ((rchan)->buf)
-#define idx(rchan) ((rchan)->scheme.lockless.index)
-#define bufno_bits(rchan) ((rchan)->scheme.lockless.bufno_bits)
-#define offset_bits(rchan) ((rchan)->scheme.lockless.offset_bits)
-#define offset_mask(rchan) ((rchan)->scheme.lockless.offset_mask)
-#define idx_mask(rchan) ((rchan)->scheme.lockless.index_mask)
-#define bulk_delivery(rchan) (((rchan)->flags & RELAY_DELIVERY_BULK) ? 1 : 0)
-#define packet_delivery(rchan) (((rchan)->flags & RELAY_DELIVERY_PACKET) ? 1 : 0)
-#define using_lockless(rchan) (((rchan)->flags & RELAY_SCHEME_LOCKLESS) ? 1 : 0)
-#define using_locking(rchan) (((rchan)->flags & RELAY_SCHEME_LOCKING) ? 1 : 0)
-#define using_tsc(rchan) (((rchan)->flags & RELAY_TIMESTAMP_TSC) ? 1 : 0)
-#define using_gettimeofday(rchan) (((rchan)->flags & RELAY_TIMESTAMP_GETTIMEOFDAY) ? 1 : 0)
-#define usage_smp(rchan) (((rchan)->flags & RELAY_USAGE_SMP) ? 1 : 0)
-#define usage_global(rchan) (((rchan)->flags & RELAY_USAGE_GLOBAL) ? 1 : 0)
-#define mode_continuous(rchan) (((rchan)->flags & RELAY_MODE_CONTINUOUS) ? 1 : 0)
-#define fill_count(rchan, i) ((rchan)->scheme.lockless.fill_count[(i)])
-#define write_buf(rchan) ((rchan)->scheme.locking.write_buf)
-#define read_buf(rchan) ((rchan)->scheme.locking.read_buf)
-#define write_buf_end(rchan) ((rchan)->scheme.locking.write_buf_end)
-#define read_buf_end(rchan) ((rchan)->scheme.locking.read_buf_end)
-#define cur_write_pos(rchan) ((rchan)->scheme.locking.current_write_pos)
-#define read_limit(rchan) ((rchan)->scheme.locking.read_limit)
-#define write_limit(rchan) ((rchan)->scheme.locking.write_limit)
-#define in_progress_event_pos(rchan) ((rchan)->scheme.locking.in_progress_event_pos)
-#define in_progress_event_size(rchan) ((rchan)->scheme.locking.in_progress_event_size)
-#define interrupted_pos(rchan) ((rchan)->scheme.locking.interrupted_pos)
-#define interrupting_size(rchan) ((rchan)->scheme.locking.interrupting_size)
-#define channel_lock(rchan) ((rchan)->scheme.locking.lock)
 
+struct rchan *relay_open(const char *base_filename,
+                        struct dentry *parent,
+                        size_t subbuf_size,
+                        size_t n_subbufs,
+                        struct rchan_callbacks *cb);
+extern void relay_close(struct rchan *chan);
+extern void relay_flush(struct rchan *chan);
+extern void relay_subbufs_consumed(struct rchan *chan,
+                                  unsigned int cpu,
+                                  size_t consumed);
+extern void relay_reset(struct rchan *chan);
+extern int relay_buf_full(struct rchan_buf *buf);
+
+extern size_t relay_switch_subbuf(struct rchan_buf *buf,
+                                 size_t length);
+extern struct dentry *relayfs_create_dir(const char *name,
+                                        struct dentry *parent);
+extern int relayfs_remove_dir(struct dentry *dentry);
+extern struct dentry *relayfs_create_file(const char *name,
+                                         struct dentry *parent,
+                                         int mode,
+                                         struct file_operations *fops,
+                                         void *data);
+extern int relayfs_remove_file(struct dentry *dentry);
 
 /**
- *     calc_time_delta - utility function for time delta calculation
- *     @now: current time
- *     @start: start time
+ *     relay_write - write data into the channel
+ *     @chan: relay channel
+ *     @data: data to be written
+ *     @length: number of bytes to write
+ *
+ *     Writes data into the current cpu's channel buffer.
  *
- *     Returns the time delta produced by subtracting start time from now.
+ *     Protects the buffer by disabling interrupts.  Use this
+ *     if you might be logging from interrupt context.  Try
+ *     __relay_write() if you know you won't be logging from
+ *     interrupt context.
  */
-static inline u32
-calc_time_delta(struct timeval *now, 
-               struct timeval *start)
+static inline void relay_write(struct rchan *chan,
+                              const void *data,
+                              size_t length)
 {
-       return (now->tv_sec - start->tv_sec) * 1000000
-               + (now->tv_usec - start->tv_usec);
+       unsigned long flags;
+       struct rchan_buf *buf;
+
+       local_irq_save(flags);
+       buf = chan->buf[smp_processor_id()];
+       if (unlikely(buf->offset + length > chan->subbuf_size))
+               length = relay_switch_subbuf(buf, length);
+       memcpy(buf->data + buf->offset, data, length);
+       buf->offset += length;
+       local_irq_restore(flags);
 }
 
 /**
- *     recalc_time_delta - utility function for time delta recalculation
- *     @now: current time
- *     @new_delta: the new time delta calculated
- *     @cpu: the associated CPU id
+ *     __relay_write - write data into the channel
+ *     @chan: relay channel
+ *     @data: data to be written
+ *     @length: number of bytes to write
+ *
+ *     Writes data into the current cpu's channel buffer.
+ *
+ *     Protects the buffer by disabling preemption.  Use
+ *     relay_write() if you might be logging from interrupt
+ *     context.
  */
-static inline void 
-recalc_time_delta(struct timeval *now,
-                 u32 *new_delta,
-                 struct rchan *rchan)
+static inline void __relay_write(struct rchan *chan,
+                                const void *data,
+                                size_t length)
 {
-       if (using_tsc(rchan) == 0)
-               *new_delta = calc_time_delta(now, &rchan->buf_start_time);
+       struct rchan_buf *buf;
+
+       buf = chan->buf[get_cpu()];
+       if (unlikely(buf->offset + length > buf->chan->subbuf_size))
+               length = relay_switch_subbuf(buf, length);
+       memcpy(buf->data + buf->offset, data, length);
+       buf->offset += length;
+       put_cpu();
 }
 
 /**
- *     have_cmpxchg - does this architecture have a cmpxchg?
+ *     relay_reserve - reserve slot in channel buffer
+ *     @chan: relay channel
+ *     @length: number of bytes to reserve
+ *
+ *     Returns pointer to reserved slot, NULL if full.
  *
- *     Returns 1 if this architecture has a cmpxchg useable by 
- *     the lockless scheme, 0 otherwise.
+ *     Reserves a slot in the current cpu's channel buffer.
+ *     Does not protect the buffer at all - caller must provide
+ *     appropriate synchronization.
  */
-static inline int 
-have_cmpxchg(void)
+static inline void *relay_reserve(struct rchan *chan, size_t length)
 {
-#if defined(__HAVE_ARCH_CMPXCHG)
-       return 1;
-#else
-       return 0;
-#endif
+       void *reserved;
+       struct rchan_buf *buf = chan->buf[smp_processor_id()];
+
+       if (unlikely(buf->offset + length > buf->chan->subbuf_size)) {
+               length = relay_switch_subbuf(buf, length);
+               if (!length)
+                       return NULL;
+       }
+       reserved = buf->data + buf->offset;
+       buf->offset += length;
+
+       return reserved;
 }
 
 /**
- *     relay_write_direct - write data directly into destination buffer
- */
-#define relay_write_direct(DEST, SRC, SIZE) \
-do\
-{\
-   memcpy(DEST, SRC, SIZE);\
-   DEST += SIZE;\
-} while (0);
-
-/**
- *     relay_lock_channel - lock the relay channel if applicable
+ *     subbuf_start_reserve - reserve bytes at the start of a sub-buffer
+ *     @buf: relay channel buffer
+ *     @length: number of bytes to reserve
  *
- *     This macro only affects the locking scheme.  If the locking scheme
- *     is in use and the channel usage is SMP, does a local_irq_save.  If the 
- *     locking sheme is in use and the channel usage is GLOBAL, uses 
- *     spin_lock_irqsave.  FLAGS is initialized to 0 since we know that
- *     it is being initialized prior to use and we avoid the compiler warning.
- */
-#define relay_lock_channel(RCHAN, FLAGS) \
-do\
-{\
-   FLAGS = 0;\
-   if (using_locking(RCHAN)) {\
-      if (usage_smp(RCHAN)) {\
-         local_irq_save(FLAGS); \
-      } else {\
-         spin_lock_irqsave(&(RCHAN)->scheme.locking.lock, FLAGS); \
-      }\
-   }\
-} while (0);
-
-/**
- *     relay_unlock_channel - unlock the relay channel if applicable
- *
- *     This macro only affects the locking scheme.  See relay_lock_channel.
- */
-#define relay_unlock_channel(RCHAN, FLAGS) \
-do\
-{\
-   if (using_locking(RCHAN)) {\
-      if (usage_smp(RCHAN)) {\
-         local_irq_restore(FLAGS); \
-      } else {\
-         spin_unlock_irqrestore(&(RCHAN)->scheme.locking.lock, FLAGS); \
-      }\
-   }\
-} while (0);
-
-/*
- * Define cmpxchg if we don't have it
- */
-#ifndef __HAVE_ARCH_CMPXCHG
-#define cmpxchg(p,o,n) 0
-#endif
-
-/*
- * High-level relayfs kernel API, fs/relayfs/relay.c
- */
-extern int
-relay_open(const char *chanpath,
-          int bufsize,
-          int nbufs,
-          u32 flags,
-          struct rchan_callbacks *channel_callbacks,
-          u32 start_reserve,
-          u32 end_reserve,
-          u32 rchan_start_reserve,
-          u32 resize_min,
-          u32 resize_max,
-          int mode,
-          char *init_buf,
-          u32 init_buf_size);
-
-extern int
-relay_close(int rchan_id);
-
-extern int
-relay_write(int rchan_id,
-           const void *data_ptr, 
-           size_t count,
-           int td_offset,
-           void **wrote_pos);
-
-extern ssize_t
-relay_read(struct rchan_reader *reader,
-          char *buf,
-          size_t count,
-          int wait,
-          u32 *actual_read_offset);
-
-extern int
-relay_discard_init_buf(int rchan_id);
-
-extern struct rchan_reader *
-add_rchan_reader(int rchan_id, int autoconsume);
-
-extern int
-remove_rchan_reader(struct rchan_reader *reader);
-
-extern struct rchan_reader *
-add_map_reader(int rchan_id);
-
-extern int
-remove_map_reader(struct rchan_reader *reader);
-
-extern int 
-relay_info(int rchan_id, struct rchan_info *rchan_info);
-
-extern void 
-relay_buffers_consumed(struct rchan_reader *reader, u32 buffers_consumed);
-
-extern void
-relay_bytes_consumed(struct rchan_reader *reader, u32 bytes_consumed, u32 read_offset);
-
-extern ssize_t
-relay_bytes_avail(struct rchan_reader *reader);
-
-extern int
-relay_realloc_buffer(int rchan_id, u32 new_nbufs, int in_background);
-
-extern int
-relay_replace_buffer(int rchan_id);
-
-extern int
-rchan_empty(struct rchan_reader *reader);
-
-extern int
-rchan_full(struct rchan_reader *reader);
-
-extern void
-update_readers_consumed(struct rchan *rchan, u32 bufs_consumed, u32 bytes_consumed);
-
-extern int 
-__relay_mmap_buffer(struct rchan *rchan, struct vm_area_struct *vma);
-
-extern struct rchan_reader *
-__add_rchan_reader(struct rchan *rchan, struct file *filp, int auto_consume, int map_reader);
-
-extern void
-__remove_rchan_reader(struct rchan_reader *reader);
-
-/*
- * Low-level relayfs kernel API, fs/relayfs/relay.c
+ *     Helper function used to reserve bytes at the beginning of
+ *     a sub-buffer in the subbuf_start() callback.
  */
-extern struct rchan *
-rchan_get(int rchan_id);
-
-extern void
-rchan_put(struct rchan *rchan);
-
-extern char *
-relay_reserve(struct rchan *rchan,
-             u32 data_len,
-             struct timeval *time_stamp,
-             u32 *time_delta,
-             int *errcode,
-             int *interrupting);
-
-extern void 
-relay_commit(struct rchan *rchan,
-            char *from, 
-            u32 len, 
-            int reserve_code,
-            int interrupting);
-
-extern u32 
-relay_get_offset(struct rchan *rchan, u32 *max_offset);
-
-extern int
-relay_reset(int rchan_id);
-
-/*
- * VFS functions, fs/relayfs/inode.c
- */
-extern int 
-relayfs_create_dir(const char *name, 
-                  struct dentry *parent, 
-                  struct dentry **dentry);
-
-extern int
-relayfs_create_file(const char * name,
-                   struct dentry *parent, 
-                   struct dentry **dentry,
-                   void * data,
-                   int mode);
-
-extern int 
-relayfs_remove_file(struct dentry *dentry);
-
-extern int
-reset_index(struct rchan *rchan, u32 old_index);
-
-
-/*
- * klog functions, fs/relayfs/klog.c
- */
-extern int
-create_klog_channel(void);
-
-extern int
-remove_klog_channel(void);
+static inline void subbuf_start_reserve(struct rchan_buf *buf,
+                                       size_t length)
+{
+       BUG_ON(length >= buf->chan->subbuf_size - 1);
+       buf->offset = length;
+}
 
 /*
- * Scheme-specific channel ops
+ * exported relay file operations, fs/relayfs/inode.c
  */
-struct relay_ops
-{
-       char * (*reserve) (struct rchan *rchan,
-                          u32 slot_len,
-                          struct timeval *time_stamp,
-                          u32 *tsc,
-                          int * errcode,
-                          int * interrupting);
-       
-       void (*commit) (struct rchan *rchan,
-                       char *from,
-                       u32 len, 
-                       int deliver, 
-                       int interrupting);
-
-       u32 (*get_offset) (struct rchan *rchan,
-                          u32 *max_offset);
-       
-       void (*resume) (struct rchan *rchan);
-       void (*finalize) (struct rchan *rchan);
-       void (*reset) (struct rchan *rchan,
-                      int init);
-       int (*reset_index) (struct rchan *rchan,
-                           u32 old_index);
-};
+extern struct file_operations relay_file_operations;
 
 #endif /* _LINUX_RELAYFS_FS_H */
 
-
-
-
-