* Occurs in several places in the IPC code.
* Chris Evans, <chris@ferret.lmh.ox.ac.uk>
* Nov 1999 - ipc helper functions, unified SMP locking
- * Manfred Spraul <manfreds@colorfullife.com>
+ * Manfred Spraul <manfred@colorfullife.com>
* Oct 2002 - One lock per IPC id. RCU ipc_free for lock-free grow_ary().
* Mingming Cao <cmm@us.ibm.com>
*/
#include <linux/smp_lock.h>
#include <linux/vmalloc.h>
#include <linux/slab.h>
+#include <linux/capability.h>
#include <linux/highuid.h>
#include <linux/security.h>
#include <linux/rcupdate.h>
#include <linux/workqueue.h>
-
+#include <linux/seq_file.h>
+#include <linux/proc_fs.h>
#include <asm/unistd.h>
#include "util.h"
+struct ipc_proc_iface {
+ const char *path;
+ const char *header;
+ struct ipc_ids *ids;
+ int (*show)(struct seq_file *, void *);
+};
+
/**
* ipc_init - initialise IPC subsystem
*
if(size > IPCMNI)
size = IPCMNI;
- ids->size = size;
ids->in_use = 0;
ids->max_id = -1;
ids->seq = 0;
ids->seq_max = seq_limit;
}
- ids->entries = ipc_rcu_alloc(sizeof(struct ipc_id)*size);
+ ids->entries = ipc_rcu_alloc(sizeof(struct kern_ipc_perm *)*size +
+ sizeof(struct ipc_id_ary));
if(ids->entries == NULL) {
printk(KERN_ERR "ipc_init_ids() failed, ipc service disabled.\n");
- ids->size = 0;
+ size = 0;
+ ids->entries = &ids->nullentry;
}
- for(i=0;i<ids->size;i++)
- ids->entries[i].p = NULL;
+ ids->entries->size = size;
+ for(i=0;i<size;i++)
+ ids->entries->p[i] = NULL;
}
+#ifdef CONFIG_PROC_FS
+static struct file_operations sysvipc_proc_fops;
+/**
+ * ipc_init_proc_interface - Create a proc interface for sysipc types
+ * using a seq_file interface.
+ * @path: Path in procfs
+ * @header: Banner to be printed at the beginning of the file.
+ * @ids: ipc id table to iterate.
+ * @show: show routine.
+ */
+void __init ipc_init_proc_interface(const char *path, const char *header,
+ struct ipc_ids *ids,
+ int (*show)(struct seq_file *, void *))
+{
+ struct proc_dir_entry *pde;
+ struct ipc_proc_iface *iface;
+
+ iface = kmalloc(sizeof(*iface), GFP_KERNEL);
+ if (!iface)
+ return;
+ iface->path = path;
+ iface->header = header;
+ iface->ids = ids;
+ iface->show = show;
+
+ pde = create_proc_entry(path,
+ S_IRUGO, /* world readable */
+ NULL /* parent dir */);
+ if (pde) {
+ pde->data = iface;
+ pde->proc_fops = &sysvipc_proc_fops;
+ } else {
+ kfree(iface);
+ }
+}
+#endif
+
/**
* ipc_findkey - find a key in an ipc identifier set
* @ids: Identifier set
int max_id = ids->max_id;
/*
- * read_barrier_depends is not needed here
+ * rcu_dereference() is not needed here
* since ipc_ids.sem is held
*/
for (id = 0; id <= max_id; id++) {
- p = ids->entries[id].p;
+ p = ids->entries->p[id];
if (p==NULL)
continue;
if (!vx_check(p->xid, VX_IDENT))
- continue;
+ continue;
if (key == p->key)
return id;
}
*/
static int grow_ary(struct ipc_ids* ids, int newsize)
{
- struct ipc_id* new;
- struct ipc_id* old;
+ struct ipc_id_ary* new;
+ struct ipc_id_ary* old;
int i;
+ int size = ids->entries->size;
if(newsize > IPCMNI)
newsize = IPCMNI;
- if(newsize <= ids->size)
+ if(newsize <= size)
return newsize;
- new = ipc_rcu_alloc(sizeof(struct ipc_id)*newsize);
+ new = ipc_rcu_alloc(sizeof(struct kern_ipc_perm *)*newsize +
+ sizeof(struct ipc_id_ary));
if(new == NULL)
- return ids->size;
- memcpy(new, ids->entries, sizeof(struct ipc_id)*ids->size);
- for(i=ids->size;i<newsize;i++) {
- new[i].p = NULL;
+ return size;
+ new->size = newsize;
+ memcpy(new->p, ids->entries->p, sizeof(struct kern_ipc_perm *)*size);
+ for(i=size;i<newsize;i++) {
+ new->p[i] = NULL;
}
old = ids->entries;
- i = ids->size;
/*
- * before setting the ids->entries to the new array, there must be a
- * smp_wmb() to make sure the memcpyed contents of the new array are
- * visible before the new array becomes visible.
+ * Use rcu_assign_pointer() to make sure the memcpyed contents
+ * of the new array are visible before the new array becomes visible.
*/
- smp_wmb(); /* prevent seeing new array uninitialized. */
- ids->entries = new;
- smp_wmb(); /* prevent indexing into old array based on new size. */
- ids->size = newsize;
+ rcu_assign_pointer(ids->entries, new);
- ipc_rcu_free(old, sizeof(struct ipc_id)*i);
- return ids->size;
+ ipc_rcu_putref(old);
+ return newsize;
}
/**
size = grow_ary(ids,size);
/*
- * read_barrier_depends() is not needed here since
+ * rcu_dereference()() is not needed here since
* ipc_ids.sem is held
*/
for (id = 0; id < size; id++) {
- if(ids->entries[id].p == NULL)
+ if(ids->entries->p[id] == NULL)
goto found;
}
return -1;
if(ids->seq > ids->seq_max)
ids->seq = 0;
- new->lock = SPIN_LOCK_UNLOCKED;
+ spin_lock_init(&new->lock);
new->deleted = 0;
rcu_read_lock();
spin_lock(&new->lock);
- ids->entries[id].p = new;
+ ids->entries->p[id] = new;
return id;
}
{
struct kern_ipc_perm* p;
int lid = id % SEQ_MULTIPLIER;
- if(lid >= ids->size)
+ if(lid >= ids->entries->size)
BUG();
/*
- * do not need a read_barrier_depends() here to force ordering
+ * do not need a rcu_dereference()() here to force ordering
* on Alpha, since the ipc_ids.sem is held.
*/
- p = ids->entries[lid].p;
- ids->entries[lid].p = NULL;
+ p = ids->entries->p[lid];
+ ids->entries->p[lid] = NULL;
if(p==NULL)
BUG();
ids->in_use--;
lid--;
if(lid == -1)
break;
- } while (ids->entries[lid].p == NULL);
+ } while (ids->entries->p[lid] == NULL);
ids->max_id = lid;
}
p->deleted = 1;
kfree(ptr);
}
-struct ipc_rcu_kmalloc
+/*
+ * rcu allocations:
+ * There are three headers that are prepended to the actual allocation:
+ * - during use: ipc_rcu_hdr.
+ * - during the rcu grace period: ipc_rcu_grace.
+ * - [only if vmalloc]: ipc_rcu_sched.
+ * Their lifetime doesn't overlap, thus the headers share the same memory.
+ * Unlike a normal union, they are right-aligned, thus some container_of
+ * forward/backward casting is necessary:
+ */
+struct ipc_rcu_hdr
+{
+ int refcount;
+ int is_vmalloc;
+ void *data[0];
+};
+
+
+struct ipc_rcu_grace
{
struct rcu_head rcu;
/* "void *" makes sure alignment of following data is sane. */
void *data[0];
};
-struct ipc_rcu_vmalloc
+struct ipc_rcu_sched
{
- struct rcu_head rcu;
struct work_struct work;
/* "void *" makes sure alignment of following data is sane. */
void *data[0];
};
+#define HDRLEN_KMALLOC (sizeof(struct ipc_rcu_grace) > sizeof(struct ipc_rcu_hdr) ? \
+ sizeof(struct ipc_rcu_grace) : sizeof(struct ipc_rcu_hdr))
+#define HDRLEN_VMALLOC (sizeof(struct ipc_rcu_sched) > HDRLEN_KMALLOC ? \
+ sizeof(struct ipc_rcu_sched) : HDRLEN_KMALLOC)
+
static inline int rcu_use_vmalloc(int size)
{
/* Too big for a single page? */
- if (sizeof(struct ipc_rcu_kmalloc) + size > PAGE_SIZE)
+ if (HDRLEN_KMALLOC + size > PAGE_SIZE)
return 1;
return 0;
}
* workqueue if necessary (for vmalloc).
*/
if (rcu_use_vmalloc(size)) {
- out = vmalloc(sizeof(struct ipc_rcu_vmalloc) + size);
- if (out) out += sizeof(struct ipc_rcu_vmalloc);
+ out = vmalloc(HDRLEN_VMALLOC + size);
+ if (out) {
+ out += HDRLEN_VMALLOC;
+ container_of(out, struct ipc_rcu_hdr, data)->is_vmalloc = 1;
+ container_of(out, struct ipc_rcu_hdr, data)->refcount = 1;
+ }
} else {
- out = kmalloc(sizeof(struct ipc_rcu_kmalloc)+size, GFP_KERNEL);
- if (out) out += sizeof(struct ipc_rcu_kmalloc);
+ out = kmalloc(HDRLEN_KMALLOC + size, GFP_KERNEL);
+ if (out) {
+ out += HDRLEN_KMALLOC;
+ container_of(out, struct ipc_rcu_hdr, data)->is_vmalloc = 0;
+ container_of(out, struct ipc_rcu_hdr, data)->refcount = 1;
+ }
}
return out;
}
+void ipc_rcu_getref(void *ptr)
+{
+ container_of(ptr, struct ipc_rcu_hdr, data)->refcount++;
+}
+
/**
- * ipc_schedule_free - free ipc + rcu space
+ * ipc_schedule_free - free ipc + rcu space
+ * @head: RCU callback structure for queued work
*
* Since RCU callback function is called in bh,
* we need to defer the vfree to schedule_work
*/
-static void ipc_schedule_free(void* arg)
+static void ipc_schedule_free(struct rcu_head *head)
{
- struct ipc_rcu_vmalloc *free = arg;
+ struct ipc_rcu_grace *grace =
+ container_of(head, struct ipc_rcu_grace, rcu);
+ struct ipc_rcu_sched *sched =
+ container_of(&(grace->data[0]), struct ipc_rcu_sched, data[0]);
- INIT_WORK(&free->work, vfree, free);
- schedule_work(&free->work);
+ INIT_WORK(&sched->work, vfree, sched);
+ schedule_work(&sched->work);
}
-void ipc_rcu_free(void* ptr, int size)
+/**
+ * ipc_immediate_free - free ipc + rcu space
+ * @head: RCU callback structure that contains pointer to be freed
+ *
+ * Free from the RCU callback context
+ */
+static void ipc_immediate_free(struct rcu_head *head)
{
- if (rcu_use_vmalloc(size)) {
- struct ipc_rcu_vmalloc *free;
- free = ptr - sizeof(*free);
- call_rcu(&free->rcu, ipc_schedule_free, free);
+ struct ipc_rcu_grace *free =
+ container_of(head, struct ipc_rcu_grace, rcu);
+ kfree(free);
+}
+
+void ipc_rcu_putref(void *ptr)
+{
+ if (--container_of(ptr, struct ipc_rcu_hdr, data)->refcount > 0)
+ return;
+
+ if (container_of(ptr, struct ipc_rcu_hdr, data)->is_vmalloc) {
+ call_rcu(&container_of(ptr, struct ipc_rcu_grace, data)->rcu,
+ ipc_schedule_free);
} else {
- struct ipc_rcu_kmalloc *free;
- free = ptr - sizeof(*free);
- /* kfree takes a "const void *" so gcc warns. So we cast. */
- call_rcu(&free->rcu, (void (*)(void *))kfree, free);
+ call_rcu(&container_of(ptr, struct ipc_rcu_grace, data)->rcu,
+ ipc_immediate_free);
}
-
}
/**
granted_mode >>= 3;
/* is there some bit set in requested_mode but not in granted_mode? */
if ((requested_mode & ~granted_mode & 0007) &&
- !capable(CAP_IPC_OWNER)) {
- if (!can_do_mlock()) {
- return -1;
- }
- }
+ !capable(CAP_IPC_OWNER))
+ return -1;
return security_ipc_permission(ipcp, flag);
}
{
struct kern_ipc_perm* out;
int lid = id % SEQ_MULTIPLIER;
- if(lid >= ids->size)
+ if(lid >= ids->entries->size)
return NULL;
- out = ids->entries[lid].p;
+ out = ids->entries->p[lid];
return out;
}
{
struct kern_ipc_perm* out;
int lid = id % SEQ_MULTIPLIER;
- struct ipc_id* entries;
+ struct ipc_id_ary* entries;
rcu_read_lock();
- if(lid >= ids->size) {
+ entries = rcu_dereference(ids->entries);
+ if(lid >= entries->size) {
rcu_read_unlock();
return NULL;
}
-
- /*
- * Note: The following two read barriers are corresponding
- * to the two write barriers in grow_ary(). They guarantee
- * the writes are seen in the same order on the read side.
- * smp_rmb() has effect on all CPUs. read_barrier_depends()
- * is used if there are data dependency between two reads, and
- * has effect only on Alpha.
- */
- smp_rmb(); /* prevent indexing old array with new size */
- entries = ids->entries;
- read_barrier_depends(); /*prevent seeing new array unitialized */
- out = entries[lid].p;
+ out = entries->p[lid];
if(out == NULL) {
rcu_read_unlock();
return NULL;
return out;
}
+void ipc_lock_by_ptr(struct kern_ipc_perm *perm)
+{
+ rcu_read_lock();
+ spin_lock(&perm->lock);
+}
+
void ipc_unlock(struct kern_ipc_perm* perm)
{
spin_unlock(&perm->lock);
}
#endif /* __ARCH_WANT_IPC_PARSE_VERSION */
+
+#ifdef CONFIG_PROC_FS
+static void *sysvipc_proc_next(struct seq_file *s, void *it, loff_t *pos)
+{
+ struct ipc_proc_iface *iface = s->private;
+ struct kern_ipc_perm *ipc = it;
+ loff_t p;
+
+ /* If we had an ipc id locked before, unlock it */
+ if (ipc && ipc != SEQ_START_TOKEN)
+ ipc_unlock(ipc);
+
+ /*
+ * p = *pos - 1 (because id 0 starts at position 1)
+ * + 1 (because we increment the position by one)
+ */
+ for (p = *pos; p <= iface->ids->max_id; p++) {
+ if ((ipc = ipc_lock(iface->ids, p)) != NULL) {
+ *pos = p + 1;
+ return ipc;
+ }
+ }
+
+ /* Out of range - return NULL to terminate iteration */
+ return NULL;
+}
+
+/*
+ * File positions: pos 0 -> header, pos n -> ipc id + 1.
+ * SeqFile iterator: iterator value locked shp or SEQ_TOKEN_START.
+ */
+static void *sysvipc_proc_start(struct seq_file *s, loff_t *pos)
+{
+ struct ipc_proc_iface *iface = s->private;
+ struct kern_ipc_perm *ipc;
+ loff_t p;
+
+ /*
+ * Take the lock - this will be released by the corresponding
+ * call to stop().
+ */
+ down(&iface->ids->sem);
+
+ /* pos < 0 is invalid */
+ if (*pos < 0)
+ return NULL;
+
+ /* pos == 0 means header */
+ if (*pos == 0)
+ return SEQ_START_TOKEN;
+
+ /* Find the (pos-1)th ipc */
+ for (p = *pos - 1; p <= iface->ids->max_id; p++) {
+ if ((ipc = ipc_lock(iface->ids, p)) != NULL) {
+ *pos = p + 1;
+ return ipc;
+ }
+ }
+ return NULL;
+}
+
+static void sysvipc_proc_stop(struct seq_file *s, void *it)
+{
+ struct kern_ipc_perm *ipc = it;
+ struct ipc_proc_iface *iface = s->private;
+
+ /* If we had a locked segment, release it */
+ if (ipc && ipc != SEQ_START_TOKEN)
+ ipc_unlock(ipc);
+
+ /* Release the lock we took in start() */
+ up(&iface->ids->sem);
+}
+
+static int sysvipc_proc_show(struct seq_file *s, void *it)
+{
+ struct ipc_proc_iface *iface = s->private;
+
+ if (it == SEQ_START_TOKEN)
+ return seq_puts(s, iface->header);
+
+ return iface->show(s, it);
+}
+
+static struct seq_operations sysvipc_proc_seqops = {
+ .start = sysvipc_proc_start,
+ .stop = sysvipc_proc_stop,
+ .next = sysvipc_proc_next,
+ .show = sysvipc_proc_show,
+};
+
+static int sysvipc_proc_open(struct inode *inode, struct file *file) {
+ int ret;
+ struct seq_file *seq;
+
+ ret = seq_open(file, &sysvipc_proc_seqops);
+ if (!ret) {
+ seq = file->private_data;
+ seq->private = PDE(inode)->data;
+ }
+ return ret;
+}
+
+static struct file_operations sysvipc_proc_fops = {
+ .open = sysvipc_proc_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = seq_release,
+};
+#endif /* CONFIG_PROC_FS */