X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=ipc%2Futil.c;h=43631f5d4149c27af4e7f5e9aca848f43247d772;hb=43bc926fffd92024b46cafaf7350d669ba9ca884;hp=eb61fb7267bd6d4bf60ba920740dc7a9108a1315;hpb=9213980e6a70d8473e0ffd4b39ab5b6caaba9ff5;p=linux-2.6.git diff --git a/ipc/util.c b/ipc/util.c index eb61fb726..43631f5d4 100644 --- a/ipc/util.c +++ b/ipc/util.c @@ -7,9 +7,11 @@ * Occurs in several places in the IPC code. * Chris Evans, * Nov 1999 - ipc helper functions, unified SMP locking - * Manfred Spraul + * Manfred Spraul * Oct 2002 - One lock per IPC id. RCU ipc_free for lock-free grow_ary(). * Mingming Cao + * Mar 2006 - support for audit of ipc object properties + * Dustin Kirkland */ #include @@ -20,16 +22,26 @@ #include #include #include +#include #include #include #include #include -#include +#include +#include +#include #include #include "util.h" +struct ipc_proc_iface { + const char *path; + const char *header; + struct ipc_ids *ids; + int (*show)(struct seq_file *, void *); +}; + /** * ipc_init - initialise IPC subsystem * @@ -59,11 +71,11 @@ __initcall(ipc_init); void __init ipc_init_ids(struct ipc_ids* ids, int size) { int i; - sema_init(&ids->sem,1); + + mutex_init(&ids->mutex); if(size > IPCMNI) size = IPCMNI; - ids->size = size; ids->in_use = 0; ids->max_id = -1; ids->seq = 0; @@ -75,22 +87,62 @@ void __init ipc_init_ids(struct ipc_ids* ids, int size) ids->seq_max = seq_limit; } - ids->entries = ipc_rcu_alloc(sizeof(struct ipc_id)*size); + ids->entries = ipc_rcu_alloc(sizeof(struct kern_ipc_perm *)*size + + sizeof(struct ipc_id_ary)); if(ids->entries == NULL) { printk(KERN_ERR "ipc_init_ids() failed, ipc service disabled.\n"); - ids->size = 0; + size = 0; + ids->entries = &ids->nullentry; + } + ids->entries->size = size; + for(i=0;ientries->p[i] = NULL; +} + +#ifdef CONFIG_PROC_FS +static struct file_operations sysvipc_proc_fops; +/** + * ipc_init_proc_interface - Create a proc interface for sysipc types + * using a seq_file interface. + * @path: Path in procfs + * @header: Banner to be printed at the beginning of the file. + * @ids: ipc id table to iterate. + * @show: show routine. + */ +void __init ipc_init_proc_interface(const char *path, const char *header, + struct ipc_ids *ids, + int (*show)(struct seq_file *, void *)) +{ + struct proc_dir_entry *pde; + struct ipc_proc_iface *iface; + + iface = kmalloc(sizeof(*iface), GFP_KERNEL); + if (!iface) + return; + iface->path = path; + iface->header = header; + iface->ids = ids; + iface->show = show; + + pde = create_proc_entry(path, + S_IRUGO, /* world readable */ + NULL /* parent dir */); + if (pde) { + pde->data = iface; + pde->proc_fops = &sysvipc_proc_fops; + } else { + kfree(iface); } - for(i=0;isize;i++) - ids->entries[i].p = NULL; } +#endif /** * ipc_findkey - find a key in an ipc identifier set * @ids: Identifier set * @key: The key to find * - * Requires ipc_ids.sem locked. + * Requires ipc_ids.mutex locked. * Returns the identifier if found or -1 if not. */ @@ -101,15 +153,15 @@ int ipc_findkey(struct ipc_ids* ids, key_t key) int max_id = ids->max_id; /* - * read_barrier_depends is not needed here - * since ipc_ids.sem is held + * rcu_dereference() is not needed here + * since ipc_ids.mutex is held */ for (id = 0; id <= max_id; id++) { - p = ids->entries[id].p; + p = ids->entries->p[id]; if (p==NULL) continue; if (!vx_check(p->xid, VX_IDENT)) - continue; + continue; if (key == p->key) return id; } @@ -117,41 +169,39 @@ int ipc_findkey(struct ipc_ids* ids, key_t key) } /* - * Requires ipc_ids.sem locked + * Requires ipc_ids.mutex locked */ static int grow_ary(struct ipc_ids* ids, int newsize) { - struct ipc_id* new; - struct ipc_id* old; + struct ipc_id_ary* new; + struct ipc_id_ary* old; int i; + int size = ids->entries->size; if(newsize > IPCMNI) newsize = IPCMNI; - if(newsize <= ids->size) + if(newsize <= size) return newsize; - new = ipc_rcu_alloc(sizeof(struct ipc_id)*newsize); + new = ipc_rcu_alloc(sizeof(struct kern_ipc_perm *)*newsize + + sizeof(struct ipc_id_ary)); if(new == NULL) - return ids->size; - memcpy(new, ids->entries, sizeof(struct ipc_id)*ids->size); - for(i=ids->size;isize = newsize; + memcpy(new->p, ids->entries->p, sizeof(struct kern_ipc_perm *)*size); + for(i=size;ip[i] = NULL; } old = ids->entries; - i = ids->size; /* - * before setting the ids->entries to the new array, there must be a - * smp_wmb() to make sure the memcpyed contents of the new array are - * visible before the new array becomes visible. + * Use rcu_assign_pointer() to make sure the memcpyed contents + * of the new array are visible before the new array becomes visible. */ - smp_wmb(); /* prevent seeing new array uninitialized. */ - ids->entries = new; - smp_wmb(); /* prevent indexing into old array based on new size. */ - ids->size = newsize; + rcu_assign_pointer(ids->entries, new); - ipc_rcu_free(old, sizeof(struct ipc_id)*i); - return ids->size; + ipc_rcu_putref(old); + return newsize; } /** @@ -165,7 +215,7 @@ static int grow_ary(struct ipc_ids* ids, int newsize) * is returned. The list is returned in a locked state on success. * On failure the list is not locked and -1 is returned. * - * Called with ipc_ids.sem held. + * Called with ipc_ids.mutex held. */ int ipc_addid(struct ipc_ids* ids, struct kern_ipc_perm* new, int size) @@ -175,11 +225,11 @@ int ipc_addid(struct ipc_ids* ids, struct kern_ipc_perm* new, int size) size = grow_ary(ids,size); /* - * read_barrier_depends() is not needed here since - * ipc_ids.sem is held + * rcu_dereference()() is not needed here since + * ipc_ids.mutex is held */ for (id = 0; id < size; id++) { - if(ids->entries[id].p == NULL) + if(ids->entries->p[id] == NULL) goto found; } return -1; @@ -195,11 +245,11 @@ found: if(ids->seq > ids->seq_max) ids->seq = 0; - new->lock = SPIN_LOCK_UNLOCKED; + spin_lock_init(&new->lock); new->deleted = 0; rcu_read_lock(); spin_lock(&new->lock); - ids->entries[id].p = new; + ids->entries->p[id] = new; return id; } @@ -212,7 +262,7 @@ found: * fed an invalid identifier. The entry is removed and internal * variables recomputed. The object associated with the identifier * is returned. - * ipc_ids.sem and the spinlock for this ID is hold before this function + * ipc_ids.mutex and the spinlock for this ID is hold before this function * is called, and remain locked on the exit. */ @@ -220,17 +270,15 @@ struct kern_ipc_perm* ipc_rmid(struct ipc_ids* ids, int id) { struct kern_ipc_perm* p; int lid = id % SEQ_MULTIPLIER; - if(lid >= ids->size) - BUG(); + BUG_ON(lid >= ids->entries->size); /* - * do not need a read_barrier_depends() here to force ordering - * on Alpha, since the ipc_ids.sem is held. + * do not need a rcu_dereference()() here to force ordering + * on Alpha, since the ipc_ids.mutex is held. */ - p = ids->entries[lid].p; - ids->entries[lid].p = NULL; - if(p==NULL) - BUG(); + p = ids->entries->p[lid]; + ids->entries->p[lid] = NULL; + BUG_ON(p==NULL); ids->in_use--; if (lid == ids->max_id) { @@ -238,7 +286,7 @@ struct kern_ipc_perm* ipc_rmid(struct ipc_ids* ids, int id) lid--; if(lid == -1) break; - } while (ids->entries[lid].p == NULL); + } while (ids->entries->p[lid] == NULL); ids->max_id = lid; } p->deleted = 1; @@ -280,25 +328,47 @@ void ipc_free(void* ptr, int size) kfree(ptr); } -struct ipc_rcu_kmalloc +/* + * rcu allocations: + * There are three headers that are prepended to the actual allocation: + * - during use: ipc_rcu_hdr. + * - during the rcu grace period: ipc_rcu_grace. + * - [only if vmalloc]: ipc_rcu_sched. + * Their lifetime doesn't overlap, thus the headers share the same memory. + * Unlike a normal union, they are right-aligned, thus some container_of + * forward/backward casting is necessary: + */ +struct ipc_rcu_hdr +{ + int refcount; + int is_vmalloc; + void *data[0]; +}; + + +struct ipc_rcu_grace { struct rcu_head rcu; /* "void *" makes sure alignment of following data is sane. */ void *data[0]; }; -struct ipc_rcu_vmalloc +struct ipc_rcu_sched { - struct rcu_head rcu; struct work_struct work; /* "void *" makes sure alignment of following data is sane. */ void *data[0]; }; +#define HDRLEN_KMALLOC (sizeof(struct ipc_rcu_grace) > sizeof(struct ipc_rcu_hdr) ? \ + sizeof(struct ipc_rcu_grace) : sizeof(struct ipc_rcu_hdr)) +#define HDRLEN_VMALLOC (sizeof(struct ipc_rcu_sched) > HDRLEN_KMALLOC ? \ + sizeof(struct ipc_rcu_sched) : HDRLEN_KMALLOC) + static inline int rcu_use_vmalloc(int size) { /* Too big for a single page? */ - if (sizeof(struct ipc_rcu_kmalloc) + size > PAGE_SIZE) + if (HDRLEN_KMALLOC + size > PAGE_SIZE) return 1; return 0; } @@ -320,43 +390,72 @@ void* ipc_rcu_alloc(int size) * workqueue if necessary (for vmalloc). */ if (rcu_use_vmalloc(size)) { - out = vmalloc(sizeof(struct ipc_rcu_vmalloc) + size); - if (out) out += sizeof(struct ipc_rcu_vmalloc); + out = vmalloc(HDRLEN_VMALLOC + size); + if (out) { + out += HDRLEN_VMALLOC; + container_of(out, struct ipc_rcu_hdr, data)->is_vmalloc = 1; + container_of(out, struct ipc_rcu_hdr, data)->refcount = 1; + } } else { - out = kmalloc(sizeof(struct ipc_rcu_kmalloc)+size, GFP_KERNEL); - if (out) out += sizeof(struct ipc_rcu_kmalloc); + out = kmalloc(HDRLEN_KMALLOC + size, GFP_KERNEL); + if (out) { + out += HDRLEN_KMALLOC; + container_of(out, struct ipc_rcu_hdr, data)->is_vmalloc = 0; + container_of(out, struct ipc_rcu_hdr, data)->refcount = 1; + } } return out; } +void ipc_rcu_getref(void *ptr) +{ + container_of(ptr, struct ipc_rcu_hdr, data)->refcount++; +} + /** - * ipc_schedule_free - free ipc + rcu space + * ipc_schedule_free - free ipc + rcu space + * @head: RCU callback structure for queued work * * Since RCU callback function is called in bh, * we need to defer the vfree to schedule_work */ -static void ipc_schedule_free(void* arg) +static void ipc_schedule_free(struct rcu_head *head) { - struct ipc_rcu_vmalloc *free = arg; + struct ipc_rcu_grace *grace = + container_of(head, struct ipc_rcu_grace, rcu); + struct ipc_rcu_sched *sched = + container_of(&(grace->data[0]), struct ipc_rcu_sched, data[0]); + + INIT_WORK(&sched->work, vfree, sched); + schedule_work(&sched->work); +} - INIT_WORK(&free->work, vfree, free); - schedule_work(&free->work); +/** + * ipc_immediate_free - free ipc + rcu space + * @head: RCU callback structure that contains pointer to be freed + * + * Free from the RCU callback context + */ +static void ipc_immediate_free(struct rcu_head *head) +{ + struct ipc_rcu_grace *free = + container_of(head, struct ipc_rcu_grace, rcu); + kfree(free); } -void ipc_rcu_free(void* ptr, int size) +void ipc_rcu_putref(void *ptr) { - if (rcu_use_vmalloc(size)) { - struct ipc_rcu_vmalloc *free; - free = ptr - sizeof(*free); - call_rcu(&free->rcu, ipc_schedule_free, free); + if (--container_of(ptr, struct ipc_rcu_hdr, data)->refcount > 0) + return; + + if (container_of(ptr, struct ipc_rcu_hdr, data)->is_vmalloc) { + call_rcu(&container_of(ptr, struct ipc_rcu_grace, data)->rcu, + ipc_schedule_free); } else { - struct ipc_rcu_kmalloc *free; - free = ptr - sizeof(*free); - /* kfree takes a "const void *" so gcc warns. So we cast. */ - call_rcu(&free->rcu, (void (*)(void *))kfree, free); + call_rcu(&container_of(ptr, struct ipc_rcu_grace, data)->rcu, + ipc_immediate_free); } - } /** @@ -370,7 +469,10 @@ void ipc_rcu_free(void* ptr, int size) int ipcperms (struct kern_ipc_perm *ipcp, short flag) { /* flag will most probably be 0 or S_...UGO from */ - int requested_mode, granted_mode; + int requested_mode, granted_mode, err; + + if (unlikely((err = audit_ipc_obj(ipcp)))) + return err; if (!vx_check(ipcp->xid, VX_ADMIN|VX_IDENT)) /* maybe just VX_IDENT? */ return -1; @@ -436,22 +538,22 @@ void ipc64_perm_to_ipc_perm (struct ipc64_perm *in, struct ipc_perm *out) /* * So far only shm_get_stat() calls ipc_get() via shm_get(), so ipc_get() - * is called with shm_ids.sem locked. Since grow_ary() is also called with - * shm_ids.sem down(for Shared Memory), there is no need to add read + * is called with shm_ids.mutex locked. Since grow_ary() is also called with + * shm_ids.mutex down(for Shared Memory), there is no need to add read * barriers here to gurantee the writes in grow_ary() are seen in order * here (for Alpha). * - * However ipc_get() itself does not necessary require ipc_ids.sem down. So - * if in the future ipc_get() is used by other places without ipc_ids.sem + * However ipc_get() itself does not necessary require ipc_ids.mutex down. So + * if in the future ipc_get() is used by other places without ipc_ids.mutex * down, then ipc_get() needs read memery barriers as ipc_lock() does. */ struct kern_ipc_perm* ipc_get(struct ipc_ids* ids, int id) { struct kern_ipc_perm* out; int lid = id % SEQ_MULTIPLIER; - if(lid >= ids->size) + if(lid >= ids->entries->size) return NULL; - out = ids->entries[lid].p; + out = ids->entries->p[lid]; return out; } @@ -459,26 +561,15 @@ struct kern_ipc_perm* ipc_lock(struct ipc_ids* ids, int id) { struct kern_ipc_perm* out; int lid = id % SEQ_MULTIPLIER; - struct ipc_id* entries; + struct ipc_id_ary* entries; rcu_read_lock(); - if(lid >= ids->size) { + entries = rcu_dereference(ids->entries); + if(lid >= entries->size) { rcu_read_unlock(); return NULL; } - - /* - * Note: The following two read barriers are corresponding - * to the two write barriers in grow_ary(). They guarantee - * the writes are seen in the same order on the read side. - * smp_rmb() has effect on all CPUs. read_barrier_depends() - * is used if there are data dependency between two reads, and - * has effect only on Alpha. - */ - smp_rmb(); /* prevent indexing old array with new size */ - entries = ids->entries; - read_barrier_depends(); /*prevent seeing new array unitialized */ - out = entries[lid].p; + out = entries->p[lid]; if(out == NULL) { rcu_read_unlock(); return NULL; @@ -496,6 +587,12 @@ struct kern_ipc_perm* ipc_lock(struct ipc_ids* ids, int id) return out; } +void ipc_lock_by_ptr(struct kern_ipc_perm *perm) +{ + rcu_read_lock(); + spin_lock(&perm->lock); +} + void ipc_unlock(struct kern_ipc_perm* perm) { spin_unlock(&perm->lock); @@ -537,3 +634,113 @@ int ipc_parse_version (int *cmd) } #endif /* __ARCH_WANT_IPC_PARSE_VERSION */ + +#ifdef CONFIG_PROC_FS +static void *sysvipc_proc_next(struct seq_file *s, void *it, loff_t *pos) +{ + struct ipc_proc_iface *iface = s->private; + struct kern_ipc_perm *ipc = it; + loff_t p; + + /* If we had an ipc id locked before, unlock it */ + if (ipc && ipc != SEQ_START_TOKEN) + ipc_unlock(ipc); + + /* + * p = *pos - 1 (because id 0 starts at position 1) + * + 1 (because we increment the position by one) + */ + for (p = *pos; p <= iface->ids->max_id; p++) { + if ((ipc = ipc_lock(iface->ids, p)) != NULL) { + *pos = p + 1; + return ipc; + } + } + + /* Out of range - return NULL to terminate iteration */ + return NULL; +} + +/* + * File positions: pos 0 -> header, pos n -> ipc id + 1. + * SeqFile iterator: iterator value locked shp or SEQ_TOKEN_START. + */ +static void *sysvipc_proc_start(struct seq_file *s, loff_t *pos) +{ + struct ipc_proc_iface *iface = s->private; + struct kern_ipc_perm *ipc; + loff_t p; + + /* + * Take the lock - this will be released by the corresponding + * call to stop(). + */ + mutex_lock(&iface->ids->mutex); + + /* pos < 0 is invalid */ + if (*pos < 0) + return NULL; + + /* pos == 0 means header */ + if (*pos == 0) + return SEQ_START_TOKEN; + + /* Find the (pos-1)th ipc */ + for (p = *pos - 1; p <= iface->ids->max_id; p++) { + if ((ipc = ipc_lock(iface->ids, p)) != NULL) { + *pos = p + 1; + return ipc; + } + } + return NULL; +} + +static void sysvipc_proc_stop(struct seq_file *s, void *it) +{ + struct kern_ipc_perm *ipc = it; + struct ipc_proc_iface *iface = s->private; + + /* If we had a locked segment, release it */ + if (ipc && ipc != SEQ_START_TOKEN) + ipc_unlock(ipc); + + /* Release the lock we took in start() */ + mutex_unlock(&iface->ids->mutex); +} + +static int sysvipc_proc_show(struct seq_file *s, void *it) +{ + struct ipc_proc_iface *iface = s->private; + + if (it == SEQ_START_TOKEN) + return seq_puts(s, iface->header); + + return iface->show(s, it); +} + +static struct seq_operations sysvipc_proc_seqops = { + .start = sysvipc_proc_start, + .stop = sysvipc_proc_stop, + .next = sysvipc_proc_next, + .show = sysvipc_proc_show, +}; + +static int sysvipc_proc_open(struct inode *inode, struct file *file) { + int ret; + struct seq_file *seq; + + ret = seq_open(file, &sysvipc_proc_seqops); + if (!ret) { + seq = file->private_data; + seq->private = PDE(inode)->data; + } + return ret; +} + +static struct file_operations sysvipc_proc_fops = { + .open = sysvipc_proc_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release, +}; +#endif /* CONFIG_PROC_FS */