X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=ipc%2Futil.c;h=37e59a988ad1cf1016a9a88b9e45bd8043ae5de5;hb=refs%2Fheads%2Fvserver;hp=d84ac5198f51ecca21e00a2bf3e340348cf0da07;hpb=9bf4aaab3e101692164d49b7ca357651eb691cb6;p=linux-2.6.git diff --git a/ipc/util.c b/ipc/util.c index d84ac5198..37e59a988 100644 --- a/ipc/util.c +++ b/ipc/util.c @@ -7,12 +7,16 @@ * Occurs in several places in the IPC code. * Chris Evans, * Nov 1999 - ipc helper functions, unified SMP locking - * Manfred Spraul + * Manfred Spraul * Oct 2002 - One lock per IPC id. RCU ipc_free for lock-free grow_ary(). * Mingming Cao + * Mar 2006 - support for audit of ipc object properties + * Dustin Kirkland + * Jun 2006 - namespaces ssupport + * OpenVZ, SWsoft Inc. + * Pavel Emelianov */ -#include #include #include #include @@ -20,16 +24,132 @@ #include #include #include +#include #include #include #include #include +#include +#include +#include +#include #include +#include #include #include "util.h" +struct ipc_proc_iface { + const char *path; + const char *header; + int ids; + int (*show)(struct seq_file *, void *); +}; + +struct ipc_namespace init_ipc_ns = { + .kref = { + .refcount = ATOMIC_INIT(2), + }, +}; + +#ifdef CONFIG_IPC_NS +static struct ipc_namespace *clone_ipc_ns(struct ipc_namespace *old_ns) +{ + int err; + struct ipc_namespace *ns; + + err = -ENOMEM; + ns = kmalloc(sizeof(struct ipc_namespace), GFP_KERNEL); + if (ns == NULL) + goto err_mem; + + err = sem_init_ns(ns); + if (err) + goto err_sem; + err = msg_init_ns(ns); + if (err) + goto err_msg; + err = shm_init_ns(ns); + if (err) + goto err_shm; + + kref_init(&ns->kref); + atomic_inc(&vs_global_ipc_ns); + return ns; + +err_shm: + msg_exit_ns(ns); +err_msg: + sem_exit_ns(ns); +err_sem: + kfree(ns); +err_mem: + return ERR_PTR(err); +} + +int unshare_ipcs(unsigned long unshare_flags, struct ipc_namespace **new_ipc) +{ + struct ipc_namespace *new; + + if (unshare_flags & CLONE_NEWIPC) { + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + new = clone_ipc_ns(current->nsproxy->ipc_ns); + if (IS_ERR(new)) + return PTR_ERR(new); + + *new_ipc = new; + } + + return 0; +} + +int copy_ipcs(unsigned long flags, struct task_struct *tsk) +{ + struct ipc_namespace *old_ns = tsk->nsproxy->ipc_ns; + struct ipc_namespace *new_ns; + int err = 0; + + if (!old_ns) + return 0; + + get_ipc_ns(old_ns); + + if (!(flags & CLONE_NEWIPC)) + return 0; + + if (!capable(CAP_SYS_ADMIN)) { + err = -EPERM; + goto out; + } + + new_ns = clone_ipc_ns(old_ns); + if (!new_ns) { + err = -ENOMEM; + goto out; + } + + tsk->nsproxy->ipc_ns = new_ns; +out: + put_ipc_ns(old_ns); + return err; +} + +void free_ipc_ns(struct kref *kref) +{ + struct ipc_namespace *ns; + + ns = container_of(kref, struct ipc_namespace, kref); + sem_exit_ns(ns); + msg_exit_ns(ns); + shm_exit_ns(ns); + atomic_dec(&vs_global_ipc_ns); + kfree(ns); +} +#endif + /** * ipc_init - initialise IPC subsystem * @@ -56,14 +176,14 @@ __initcall(ipc_init); * array itself. */ -void __init ipc_init_ids(struct ipc_ids* ids, int size) +void __ipc_init ipc_init_ids(struct ipc_ids* ids, int size) { int i; - sema_init(&ids->sem,1); + + mutex_init(&ids->mutex); if(size > IPCMNI) size = IPCMNI; - ids->size = size; ids->in_use = 0; ids->max_id = -1; ids->seq = 0; @@ -75,22 +195,61 @@ void __init ipc_init_ids(struct ipc_ids* ids, int size) ids->seq_max = seq_limit; } - ids->entries = ipc_rcu_alloc(sizeof(struct ipc_id)*size); + ids->entries = ipc_rcu_alloc(sizeof(struct kern_ipc_perm *)*size + + sizeof(struct ipc_id_ary)); if(ids->entries == NULL) { printk(KERN_ERR "ipc_init_ids() failed, ipc service disabled.\n"); - ids->size = 0; + size = 0; + ids->entries = &ids->nullentry; + } + ids->entries->size = size; + for(i=0;ientries->p[i] = NULL; +} + +#ifdef CONFIG_PROC_FS +static struct file_operations sysvipc_proc_fops; +/** + * ipc_init_proc_interface - Create a proc interface for sysipc types + * using a seq_file interface. + * @path: Path in procfs + * @header: Banner to be printed at the beginning of the file. + * @ids: ipc id table to iterate. + * @show: show routine. + */ +void __init ipc_init_proc_interface(const char *path, const char *header, + int ids, int (*show)(struct seq_file *, void *)) +{ + struct proc_dir_entry *pde; + struct ipc_proc_iface *iface; + + iface = kmalloc(sizeof(*iface), GFP_KERNEL); + if (!iface) + return; + iface->path = path; + iface->header = header; + iface->ids = ids; + iface->show = show; + + pde = create_proc_entry(path, + S_IRUGO, /* world readable */ + NULL /* parent dir */); + if (pde) { + pde->data = iface; + pde->proc_fops = &sysvipc_proc_fops; + } else { + kfree(iface); } - for(i=0;isize;i++) - ids->entries[i].p = NULL; } +#endif /** * ipc_findkey - find a key in an ipc identifier set * @ids: Identifier set * @key: The key to find * - * Requires ipc_ids.sem locked. + * Requires ipc_ids.mutex locked. * Returns the identifier if found or -1 if not. */ @@ -101,15 +260,13 @@ int ipc_findkey(struct ipc_ids* ids, key_t key) int max_id = ids->max_id; /* - * read_barrier_depends is not needed here - * since ipc_ids.sem is held + * rcu_dereference() is not needed here + * since ipc_ids.mutex is held */ for (id = 0; id <= max_id; id++) { - p = ids->entries[id].p; - if (p==NULL) + p = ids->entries->p[id]; + if(p==NULL) continue; - if (!vx_check(p->xid, VX_IDENT)) - continue; if (key == p->key) return id; } @@ -117,41 +274,39 @@ int ipc_findkey(struct ipc_ids* ids, key_t key) } /* - * Requires ipc_ids.sem locked + * Requires ipc_ids.mutex locked */ static int grow_ary(struct ipc_ids* ids, int newsize) { - struct ipc_id* new; - struct ipc_id* old; + struct ipc_id_ary* new; + struct ipc_id_ary* old; int i; + int size = ids->entries->size; if(newsize > IPCMNI) newsize = IPCMNI; - if(newsize <= ids->size) + if(newsize <= size) return newsize; - new = ipc_rcu_alloc(sizeof(struct ipc_id)*newsize); + new = ipc_rcu_alloc(sizeof(struct kern_ipc_perm *)*newsize + + sizeof(struct ipc_id_ary)); if(new == NULL) - return ids->size; - memcpy(new, ids->entries, sizeof(struct ipc_id)*ids->size); - for(i=ids->size;isize = newsize; + memcpy(new->p, ids->entries->p, sizeof(struct kern_ipc_perm *)*size); + for(i=size;ip[i] = NULL; } old = ids->entries; - i = ids->size; /* - * before setting the ids->entries to the new array, there must be a - * smp_wmb() to make sure the memcpyed contents of the new array are - * visible before the new array becomes visible. + * Use rcu_assign_pointer() to make sure the memcpyed contents + * of the new array are visible before the new array becomes visible. */ - smp_wmb(); /* prevent seeing new array uninitialized. */ - ids->entries = new; - smp_wmb(); /* prevent indexing into old array based on new size. */ - ids->size = newsize; + rcu_assign_pointer(ids->entries, new); - ipc_rcu_free(old, sizeof(struct ipc_id)*i); - return ids->size; + __ipc_fini_ids(ids, old); + return newsize; } /** @@ -165,7 +320,7 @@ static int grow_ary(struct ipc_ids* ids, int newsize) * is returned. The list is returned in a locked state on success. * On failure the list is not locked and -1 is returned. * - * Called with ipc_ids.sem held. + * Called with ipc_ids.mutex held. */ int ipc_addid(struct ipc_ids* ids, struct kern_ipc_perm* new, int size) @@ -175,11 +330,11 @@ int ipc_addid(struct ipc_ids* ids, struct kern_ipc_perm* new, int size) size = grow_ary(ids,size); /* - * read_barrier_depends() is not needed here since - * ipc_ids.sem is held + * rcu_dereference()() is not needed here since + * ipc_ids.mutex is held */ for (id = 0; id < size; id++) { - if(ids->entries[id].p == NULL) + if(ids->entries->p[id] == NULL) goto found; } return -1; @@ -195,11 +350,11 @@ found: if(ids->seq > ids->seq_max) ids->seq = 0; - new->lock = SPIN_LOCK_UNLOCKED; + spin_lock_init(&new->lock); new->deleted = 0; rcu_read_lock(); spin_lock(&new->lock); - ids->entries[id].p = new; + ids->entries->p[id] = new; return id; } @@ -212,7 +367,7 @@ found: * fed an invalid identifier. The entry is removed and internal * variables recomputed. The object associated with the identifier * is returned. - * ipc_ids.sem and the spinlock for this ID is hold before this function + * ipc_ids.mutex and the spinlock for this ID is hold before this function * is called, and remain locked on the exit. */ @@ -220,17 +375,15 @@ struct kern_ipc_perm* ipc_rmid(struct ipc_ids* ids, int id) { struct kern_ipc_perm* p; int lid = id % SEQ_MULTIPLIER; - if(lid >= ids->size) - BUG(); + BUG_ON(lid >= ids->entries->size); /* - * do not need a read_barrier_depends() here to force ordering - * on Alpha, since the ipc_ids.sem is held. + * do not need a rcu_dereference()() here to force ordering + * on Alpha, since the ipc_ids.mutex is held. */ - p = ids->entries[lid].p; - ids->entries[lid].p = NULL; - if(p==NULL) - BUG(); + p = ids->entries->p[lid]; + ids->entries->p[lid] = NULL; + BUG_ON(p==NULL); ids->in_use--; if (lid == ids->max_id) { @@ -238,7 +391,7 @@ struct kern_ipc_perm* ipc_rmid(struct ipc_ids* ids, int id) lid--; if(lid == -1) break; - } while (ids->entries[lid].p == NULL); + } while (ids->entries->p[lid] == NULL); ids->max_id = lid; } p->deleted = 1; @@ -280,25 +433,47 @@ void ipc_free(void* ptr, int size) kfree(ptr); } -struct ipc_rcu_kmalloc +/* + * rcu allocations: + * There are three headers that are prepended to the actual allocation: + * - during use: ipc_rcu_hdr. + * - during the rcu grace period: ipc_rcu_grace. + * - [only if vmalloc]: ipc_rcu_sched. + * Their lifetime doesn't overlap, thus the headers share the same memory. + * Unlike a normal union, they are right-aligned, thus some container_of + * forward/backward casting is necessary: + */ +struct ipc_rcu_hdr +{ + int refcount; + int is_vmalloc; + void *data[0]; +}; + + +struct ipc_rcu_grace { struct rcu_head rcu; /* "void *" makes sure alignment of following data is sane. */ void *data[0]; }; -struct ipc_rcu_vmalloc +struct ipc_rcu_sched { - struct rcu_head rcu; struct work_struct work; /* "void *" makes sure alignment of following data is sane. */ void *data[0]; }; +#define HDRLEN_KMALLOC (sizeof(struct ipc_rcu_grace) > sizeof(struct ipc_rcu_hdr) ? \ + sizeof(struct ipc_rcu_grace) : sizeof(struct ipc_rcu_hdr)) +#define HDRLEN_VMALLOC (sizeof(struct ipc_rcu_sched) > HDRLEN_KMALLOC ? \ + sizeof(struct ipc_rcu_sched) : HDRLEN_KMALLOC) + static inline int rcu_use_vmalloc(int size) { /* Too big for a single page? */ - if (sizeof(struct ipc_rcu_kmalloc) + size > PAGE_SIZE) + if (HDRLEN_KMALLOC + size > PAGE_SIZE) return 1; return 0; } @@ -320,58 +495,77 @@ void* ipc_rcu_alloc(int size) * workqueue if necessary (for vmalloc). */ if (rcu_use_vmalloc(size)) { - out = vmalloc(sizeof(struct ipc_rcu_vmalloc) + size); - if (out) out += sizeof(struct ipc_rcu_vmalloc); + out = vmalloc(HDRLEN_VMALLOC + size); + if (out) { + out += HDRLEN_VMALLOC; + container_of(out, struct ipc_rcu_hdr, data)->is_vmalloc = 1; + container_of(out, struct ipc_rcu_hdr, data)->refcount = 1; + } } else { - out = kmalloc(sizeof(struct ipc_rcu_kmalloc)+size, GFP_KERNEL); - if (out) out += sizeof(struct ipc_rcu_kmalloc); + out = kmalloc(HDRLEN_KMALLOC + size, GFP_KERNEL); + if (out) { + out += HDRLEN_KMALLOC; + container_of(out, struct ipc_rcu_hdr, data)->is_vmalloc = 0; + container_of(out, struct ipc_rcu_hdr, data)->refcount = 1; + } } return out; } +void ipc_rcu_getref(void *ptr) +{ + container_of(ptr, struct ipc_rcu_hdr, data)->refcount++; +} + +static void ipc_do_vfree(struct work_struct *work) +{ + vfree(container_of(work, struct ipc_rcu_sched, work)); +} + /** - * ipc_schedule_free - free ipc + rcu space + * ipc_schedule_free - free ipc + rcu space + * @head: RCU callback structure for queued work * * Since RCU callback function is called in bh, * we need to defer the vfree to schedule_work */ static void ipc_schedule_free(struct rcu_head *head) { - struct ipc_rcu_vmalloc *free = - container_of(head, struct ipc_rcu_vmalloc, rcu); + struct ipc_rcu_grace *grace = + container_of(head, struct ipc_rcu_grace, rcu); + struct ipc_rcu_sched *sched = + container_of(&(grace->data[0]), struct ipc_rcu_sched, data[0]); - INIT_WORK(&free->work, vfree, free); - schedule_work(&free->work); + INIT_WORK(&sched->work, ipc_do_vfree); + schedule_work(&sched->work); } /** - * ipc_immediate_free - free ipc + rcu space - * - * Free from the RCU callback context + * ipc_immediate_free - free ipc + rcu space + * @head: RCU callback structure that contains pointer to be freed * + * Free from the RCU callback context */ static void ipc_immediate_free(struct rcu_head *head) { - struct ipc_rcu_kmalloc *free = - container_of(head, struct ipc_rcu_kmalloc, rcu); + struct ipc_rcu_grace *free = + container_of(head, struct ipc_rcu_grace, rcu); kfree(free); } - - -void ipc_rcu_free(void* ptr, int size) +void ipc_rcu_putref(void *ptr) { - if (rcu_use_vmalloc(size)) { - struct ipc_rcu_vmalloc *free; - free = ptr - sizeof(*free); - call_rcu(&free->rcu, ipc_schedule_free); + if (--container_of(ptr, struct ipc_rcu_hdr, data)->refcount > 0) + return; + + if (container_of(ptr, struct ipc_rcu_hdr, data)->is_vmalloc) { + call_rcu(&container_of(ptr, struct ipc_rcu_grace, data)->rcu, + ipc_schedule_free); } else { - struct ipc_rcu_kmalloc *free; - free = ptr - sizeof(*free); - call_rcu(&free->rcu, ipc_immediate_free); + call_rcu(&container_of(ptr, struct ipc_rcu_grace, data)->rcu, + ipc_immediate_free); } - } /** @@ -385,10 +579,10 @@ void ipc_rcu_free(void* ptr, int size) int ipcperms (struct kern_ipc_perm *ipcp, short flag) { /* flag will most probably be 0 or S_...UGO from */ - int requested_mode, granted_mode; + int requested_mode, granted_mode, err; - if (!vx_check(ipcp->xid, VX_ADMIN|VX_IDENT)) /* maybe just VX_IDENT? */ - return -1; + if (unlikely((err = audit_ipc_obj(ipcp)))) + return err; requested_mode = (flag >> 6) | (flag >> 3) | flag; granted_mode = ipcp->mode; if (current->euid == ipcp->cuid || current->euid == ipcp->uid) @@ -451,22 +645,22 @@ void ipc64_perm_to_ipc_perm (struct ipc64_perm *in, struct ipc_perm *out) /* * So far only shm_get_stat() calls ipc_get() via shm_get(), so ipc_get() - * is called with shm_ids.sem locked. Since grow_ary() is also called with - * shm_ids.sem down(for Shared Memory), there is no need to add read + * is called with shm_ids.mutex locked. Since grow_ary() is also called with + * shm_ids.mutex down(for Shared Memory), there is no need to add read * barriers here to gurantee the writes in grow_ary() are seen in order * here (for Alpha). * - * However ipc_get() itself does not necessary require ipc_ids.sem down. So - * if in the future ipc_get() is used by other places without ipc_ids.sem + * However ipc_get() itself does not necessary require ipc_ids.mutex down. So + * if in the future ipc_get() is used by other places without ipc_ids.mutex * down, then ipc_get() needs read memery barriers as ipc_lock() does. */ struct kern_ipc_perm* ipc_get(struct ipc_ids* ids, int id) { struct kern_ipc_perm* out; int lid = id % SEQ_MULTIPLIER; - if(lid >= ids->size) + if(lid >= ids->entries->size) return NULL; - out = ids->entries[lid].p; + out = ids->entries->p[lid]; return out; } @@ -474,26 +668,15 @@ struct kern_ipc_perm* ipc_lock(struct ipc_ids* ids, int id) { struct kern_ipc_perm* out; int lid = id % SEQ_MULTIPLIER; - struct ipc_id* entries; + struct ipc_id_ary* entries; rcu_read_lock(); - if(lid >= ids->size) { + entries = rcu_dereference(ids->entries); + if(lid >= entries->size) { rcu_read_unlock(); return NULL; } - - /* - * Note: The following two read barriers are corresponding - * to the two write barriers in grow_ary(). They guarantee - * the writes are seen in the same order on the read side. - * smp_rmb() has effect on all CPUs. read_barrier_depends() - * is used if there are data dependency between two reads, and - * has effect only on Alpha. - */ - smp_rmb(); /* prevent indexing old array with new size */ - entries = ids->entries; - read_barrier_depends(); /*prevent seeing new array unitialized */ - out = entries[lid].p; + out = entries->p[lid]; if(out == NULL) { rcu_read_unlock(); return NULL; @@ -511,6 +694,12 @@ struct kern_ipc_perm* ipc_lock(struct ipc_ids* ids, int id) return out; } +void ipc_lock_by_ptr(struct kern_ipc_perm *perm) +{ + rcu_read_lock(); + spin_lock(&perm->lock); +} + void ipc_unlock(struct kern_ipc_perm* perm) { spin_unlock(&perm->lock); @@ -552,3 +741,121 @@ int ipc_parse_version (int *cmd) } #endif /* __ARCH_WANT_IPC_PARSE_VERSION */ + +#ifdef CONFIG_PROC_FS +static void *sysvipc_proc_next(struct seq_file *s, void *it, loff_t *pos) +{ + struct ipc_proc_iface *iface = s->private; + struct kern_ipc_perm *ipc = it; + loff_t p; + struct ipc_ids *ids; + + ids = current->nsproxy->ipc_ns->ids[iface->ids]; + + /* If we had an ipc id locked before, unlock it */ + if (ipc && ipc != SEQ_START_TOKEN) + ipc_unlock(ipc); + + /* + * p = *pos - 1 (because id 0 starts at position 1) + * + 1 (because we increment the position by one) + */ + for (p = *pos; p <= ids->max_id; p++) { + if ((ipc = ipc_lock(ids, p)) != NULL) { + *pos = p + 1; + return ipc; + } + } + + /* Out of range - return NULL to terminate iteration */ + return NULL; +} + +/* + * File positions: pos 0 -> header, pos n -> ipc id + 1. + * SeqFile iterator: iterator value locked shp or SEQ_TOKEN_START. + */ +static void *sysvipc_proc_start(struct seq_file *s, loff_t *pos) +{ + struct ipc_proc_iface *iface = s->private; + struct kern_ipc_perm *ipc; + loff_t p; + struct ipc_ids *ids; + + ids = current->nsproxy->ipc_ns->ids[iface->ids]; + + /* + * Take the lock - this will be released by the corresponding + * call to stop(). + */ + mutex_lock(&ids->mutex); + + /* pos < 0 is invalid */ + if (*pos < 0) + return NULL; + + /* pos == 0 means header */ + if (*pos == 0) + return SEQ_START_TOKEN; + + /* Find the (pos-1)th ipc */ + for (p = *pos - 1; p <= ids->max_id; p++) { + if ((ipc = ipc_lock(ids, p)) != NULL) { + *pos = p + 1; + return ipc; + } + } + return NULL; +} + +static void sysvipc_proc_stop(struct seq_file *s, void *it) +{ + struct kern_ipc_perm *ipc = it; + struct ipc_proc_iface *iface = s->private; + struct ipc_ids *ids; + + /* If we had a locked segment, release it */ + if (ipc && ipc != SEQ_START_TOKEN) + ipc_unlock(ipc); + + ids = current->nsproxy->ipc_ns->ids[iface->ids]; + /* Release the lock we took in start() */ + mutex_unlock(&ids->mutex); +} + +static int sysvipc_proc_show(struct seq_file *s, void *it) +{ + struct ipc_proc_iface *iface = s->private; + + if (it == SEQ_START_TOKEN) + return seq_puts(s, iface->header); + + return iface->show(s, it); +} + +static struct seq_operations sysvipc_proc_seqops = { + .start = sysvipc_proc_start, + .stop = sysvipc_proc_stop, + .next = sysvipc_proc_next, + .show = sysvipc_proc_show, +}; + +static int sysvipc_proc_open(struct inode *inode, struct file *file) { + int ret; + struct seq_file *seq; + + ret = seq_open(file, &sysvipc_proc_seqops); + if (!ret) { + seq = file->private_data; + seq->private = PDE(inode)->data; + } + return ret; +} + +static struct file_operations sysvipc_proc_fops = { + .open = sysvipc_proc_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release, +}; +#endif /* CONFIG_PROC_FS */