vserver 2.0-pre1

[linux-2.6.git] / ipc / util.c
diff --git a/ipc/util.c b/ipc/util.c

index 310ccc5..c803388 100644 (file)
--- a/ipc/util.c
+++ b/ipc/util.c
@@ -25,6 +25,8 @@
  #include <linux/rcupdate.h>
  #include <linux/workqueue.h>
  
+#include <asm/unistd.h>
+
  #include "util.h"
  
  /**
@@ -60,7 +62,6 @@ void __init ipc_init_ids(struct ipc_ids* ids, int size)
  
         if(size > IPCMNI)
                 size = IPCMNI;
-       ids->size = size;
         ids->in_use = 0;
         ids->max_id = -1;
         ids->seq = 0;
@@ -72,14 +73,17 @@ void __init ipc_init_ids(struct ipc_ids* ids, int size)
                         ids->seq_max = seq_limit;
         }
  
-       ids->entries = ipc_rcu_alloc(sizeof(struct ipc_id)*size);
+       ids->entries = ipc_rcu_alloc(sizeof(struct kern_ipc_perm *)*size +
+                                    sizeof(struct ipc_id_ary));
  
         if(ids->entries == NULL) {
                 printk(KERN_ERR "ipc_init_ids() failed, ipc service disabled.\n");
-               ids->size = 0;
+               size = 0;
+               ids->entries = &ids->nullentry;
         }
-       for(i=0;i<ids->size;i++)
-               ids->entries[i].p = NULL;
+       ids->entries->size = size;
+       for(i=0;i<size;i++)
+               ids->entries->p[i] = NULL;
  }
  
  /**
@@ -98,15 +102,15 @@ int ipc_findkey(struct ipc_ids* ids, key_t key)
         int max_id = ids->max_id;
  
         /*
-        * read_barrier_depends is not needed here
+        * rcu_dereference() is not needed here
          * since ipc_ids.sem is held
          */
         for (id = 0; id <= max_id; id++) {
-               p = ids->entries[id].p;
+               p = ids->entries->p[id];
                 if (p==NULL)
                         continue;
                 if (!vx_check(p->xid, VX_IDENT))
-                       continue;       
+                       continue;
                 if (key == p->key)
                         return id;
         }
@@ -118,37 +122,36 @@ int ipc_findkey(struct ipc_ids* ids, key_t key)
   */
  static int grow_ary(struct ipc_ids* ids, int newsize)
  {
-       struct ipc_id* new;
-       struct ipc_id* old;
+       struct ipc_id_ary* new;
+       struct ipc_id_ary* old;
         int i;
+       int size = ids->entries->size;
  
         if(newsize > IPCMNI)
                 newsize = IPCMNI;
-       if(newsize <= ids->size)
+       if(newsize <= size)
                 return newsize;
  
-       new = ipc_rcu_alloc(sizeof(struct ipc_id)*newsize);
+       new = ipc_rcu_alloc(sizeof(struct kern_ipc_perm *)*newsize +
+                           sizeof(struct ipc_id_ary));
         if(new == NULL)
-               return ids->size;
-       memcpy(new, ids->entries, sizeof(struct ipc_id)*ids->size);
-       for(i=ids->size;i<newsize;i++) {
-               new[i].p = NULL;
+               return size;
+       new->size = newsize;
+       memcpy(new->p, ids->entries->p, sizeof(struct kern_ipc_perm *)*size +
+                                       sizeof(struct ipc_id_ary));
+       for(i=size;i<newsize;i++) {
+               new->p[i] = NULL;
         }
         old = ids->entries;
-       i = ids->size;
  
         /*
-        * before setting the ids->entries to the new array, there must be a
-        * smp_wmb() to make sure the memcpyed contents of the new array are
-        * visible before the new array becomes visible.
+        * Use rcu_assign_pointer() to make sure the memcpyed contents
+        * of the new array are visible before the new array becomes visible.
          */
-       smp_wmb();      /* prevent seeing new array uninitialized. */
-       ids->entries = new;
-       smp_wmb();      /* prevent indexing into old array based on new size. */
-       ids->size = newsize;
+       rcu_assign_pointer(ids->entries, new);
  
-       ipc_rcu_free(old, sizeof(struct ipc_id)*i);
-       return ids->size;
+       ipc_rcu_putref(old);
+       return newsize;
  }
  
  /**
@@ -172,11 +175,11 @@ int ipc_addid(struct ipc_ids* ids, struct kern_ipc_perm* new, int size)
         size = grow_ary(ids,size);
  
         /*
-        * read_barrier_depends() is not needed here since
+        * rcu_dereference()() is not needed here since
          * ipc_ids.sem is held
          */
         for (id = 0; id < size; id++) {
-               if(ids->entries[id].p == NULL)
+               if(ids->entries->p[id] == NULL)
                         goto found;
         }
         return -1;
@@ -192,11 +195,11 @@ found:
         if(ids->seq > ids->seq_max)
                 ids->seq = 0;
  
-       new->lock = SPIN_LOCK_UNLOCKED;
+       spin_lock_init(&new->lock);
         new->deleted = 0;
         rcu_read_lock();
         spin_lock(&new->lock);
-       ids->entries[id].p = new;
+       ids->entries->p[id] = new;
         return id;
  }
  
@@ -217,15 +220,15 @@ struct kern_ipc_perm* ipc_rmid(struct ipc_ids* ids, int id)
  {
         struct kern_ipc_perm* p;
         int lid = id % SEQ_MULTIPLIER;
-       if(lid >= ids->size)
+       if(lid >= ids->entries->size)
                 BUG();
  
         /* 
-        * do not need a read_barrier_depends() here to force ordering
+        * do not need a rcu_dereference()() here to force ordering
          * on Alpha, since the ipc_ids.sem is held.
          */     
-       p = ids->entries[lid].p;
-       ids->entries[lid].p = NULL;
+       p = ids->entries->p[lid];
+       ids->entries->p[lid] = NULL;
         if(p==NULL)
                 BUG();
         ids->in_use--;
@@ -235,7 +238,7 @@ struct kern_ipc_perm* ipc_rmid(struct ipc_ids* ids, int id)
                         lid--;
                         if(lid == -1)
                                 break;
-               } while (ids->entries[lid].p == NULL);
+               } while (ids->entries->p[lid] == NULL);
                 ids->max_id = lid;
         }
         p->deleted = 1;
@@ -277,25 +280,47 @@ void ipc_free(void* ptr, int size)
                 kfree(ptr);
  }
  
-struct ipc_rcu_kmalloc
+/*
+ * rcu allocations:
+ * There are three headers that are prepended to the actual allocation:
+ * - during use: ipc_rcu_hdr.
+ * - during the rcu grace period: ipc_rcu_grace.
+ * - [only if vmalloc]: ipc_rcu_sched.
+ * Their lifetime doesn't overlap, thus the headers share the same memory.
+ * Unlike a normal union, they are right-aligned, thus some container_of
+ * forward/backward casting is necessary:
+ */
+struct ipc_rcu_hdr
+{
+       int refcount;
+       int is_vmalloc;
+       void *data[0];
+};
+
+
+struct ipc_rcu_grace
  {
         struct rcu_head rcu;
         /* "void *" makes sure alignment of following data is sane. */
         void *data[0];
  };
  
-struct ipc_rcu_vmalloc
+struct ipc_rcu_sched
  {
-       struct rcu_head rcu;
         struct work_struct work;
         /* "void *" makes sure alignment of following data is sane. */
         void *data[0];
  };
  
+#define HDRLEN_KMALLOC         (sizeof(struct ipc_rcu_grace) > sizeof(struct ipc_rcu_hdr) ? \
+                                       sizeof(struct ipc_rcu_grace) : sizeof(struct ipc_rcu_hdr))
+#define HDRLEN_VMALLOC         (sizeof(struct ipc_rcu_sched) > HDRLEN_KMALLOC ? \
+                                       sizeof(struct ipc_rcu_sched) : HDRLEN_KMALLOC)
+
  static inline int rcu_use_vmalloc(int size)
  {
         /* Too big for a single page? */
-       if (sizeof(struct ipc_rcu_kmalloc) + size > PAGE_SIZE)
+       if (HDRLEN_KMALLOC + size > PAGE_SIZE)
                 return 1;
         return 0;
  }
@@ -317,43 +342,71 @@ void* ipc_rcu_alloc(int size)
          * workqueue if necessary (for vmalloc). 
          */
         if (rcu_use_vmalloc(size)) {
-               out = vmalloc(sizeof(struct ipc_rcu_vmalloc) + size);
-               if (out) out += sizeof(struct ipc_rcu_vmalloc);
+               out = vmalloc(HDRLEN_VMALLOC + size);
+               if (out) {
+                       out += HDRLEN_VMALLOC;
+                       container_of(out, struct ipc_rcu_hdr, data)->is_vmalloc = 1;
+                       container_of(out, struct ipc_rcu_hdr, data)->refcount = 1;
+               }
         } else {
-               out = kmalloc(sizeof(struct ipc_rcu_kmalloc)+size, GFP_KERNEL);
-               if (out) out += sizeof(struct ipc_rcu_kmalloc);
+               out = kmalloc(HDRLEN_KMALLOC + size, GFP_KERNEL);
+               if (out) {
+                       out += HDRLEN_KMALLOC;
+                       container_of(out, struct ipc_rcu_hdr, data)->is_vmalloc = 0;
+                       container_of(out, struct ipc_rcu_hdr, data)->refcount = 1;
+               }
         }
  
         return out;
  }
  
+void ipc_rcu_getref(void *ptr)
+{
+       container_of(ptr, struct ipc_rcu_hdr, data)->refcount++;
+}
+
  /**
   *     ipc_schedule_free       - free ipc + rcu space
   * 
   * Since RCU callback function is called in bh,
   * we need to defer the vfree to schedule_work
   */
-static void ipc_schedule_free(void* arg)
+static void ipc_schedule_free(struct rcu_head *head)
  {
-       struct ipc_rcu_vmalloc *free = arg;
+       struct ipc_rcu_grace *grace =
+               container_of(head, struct ipc_rcu_grace, rcu);
+       struct ipc_rcu_sched *sched =
+                       container_of(&(grace->data[0]), struct ipc_rcu_sched, data[0]);
+
+       INIT_WORK(&sched->work, vfree, sched);
+       schedule_work(&sched->work);
+}
  
-       INIT_WORK(&free->work, vfree, free);
-       schedule_work(&free->work);
+/**
+ *     ipc_immediate_free      - free ipc + rcu space
+ *
+ *     Free from the RCU callback context
+ *
+ */
+static void ipc_immediate_free(struct rcu_head *head)
+{
+       struct ipc_rcu_grace *free =
+               container_of(head, struct ipc_rcu_grace, rcu);
+       kfree(free);
  }
  
-void ipc_rcu_free(void* ptr, int size)
+void ipc_rcu_putref(void *ptr)
  {
-       if (rcu_use_vmalloc(size)) {
-               struct ipc_rcu_vmalloc *free;
-               free = ptr - sizeof(*free);
-               call_rcu(&free->rcu, ipc_schedule_free, free);
+       if (--container_of(ptr, struct ipc_rcu_hdr, data)->refcount > 0)
+               return;
+
+       if (container_of(ptr, struct ipc_rcu_hdr, data)->is_vmalloc) {
+               call_rcu(&container_of(ptr, struct ipc_rcu_grace, data)->rcu,
+                               ipc_schedule_free);
         } else {
-               struct ipc_rcu_kmalloc *free;
-               free = ptr - sizeof(*free);
-               /* kfree takes a "const void *" so gcc warns.  So we cast. */
-               call_rcu(&free->rcu, (void (*)(void *))kfree, free);
+               call_rcu(&container_of(ptr, struct ipc_rcu_grace, data)->rcu,
+                               ipc_immediate_free);
         }
-
  }
  
  /**
@@ -446,9 +499,9 @@ struct kern_ipc_perm* ipc_get(struct ipc_ids* ids, int id)
  {
         struct kern_ipc_perm* out;
         int lid = id % SEQ_MULTIPLIER;
-       if(lid >= ids->size)
+       if(lid >= ids->entries->size)
                 return NULL;
-       out = ids->entries[lid].p;
+       out = ids->entries->p[lid];
         return out;
  }
  
@@ -456,26 +509,15 @@ struct kern_ipc_perm* ipc_lock(struct ipc_ids* ids, int id)
  {
         struct kern_ipc_perm* out;
         int lid = id % SEQ_MULTIPLIER;
-       struct ipc_id* entries;
+       struct ipc_id_ary* entries;
  
         rcu_read_lock();
-       if(lid >= ids->size) {
+       entries = rcu_dereference(ids->entries);
+       if(lid >= entries->size) {
                 rcu_read_unlock();
                 return NULL;
         }
-
-       /* 
-        * Note: The following two read barriers are corresponding
-        * to the two write barriers in grow_ary(). They guarantee 
-        * the writes are seen in the same order on the read side. 
-        * smp_rmb() has effect on all CPUs.  read_barrier_depends() 
-        * is used if there are data dependency between two reads, and 
-        * has effect only on Alpha.
-        */
-       smp_rmb(); /* prevent indexing old array with new size */
-       entries = ids->entries;
-       read_barrier_depends(); /*prevent seeing new array unitialized */
-       out = entries[lid].p;
+       out = entries->p[lid];
         if(out == NULL) {
                 rcu_read_unlock();
                 return NULL;
@@ -493,6 +535,12 @@ struct kern_ipc_perm* ipc_lock(struct ipc_ids* ids, int id)
         return out;
  }
  
+void ipc_lock_by_ptr(struct kern_ipc_perm *perm)
+{
+       rcu_read_lock();
+       spin_lock(&perm->lock);
+}
+
  void ipc_unlock(struct kern_ipc_perm* perm)
  {
         spin_unlock(&perm->lock);
@@ -511,7 +559,8 @@ int ipc_checkid(struct ipc_ids* ids, struct kern_ipc_perm* ipcp, int uid)
         return 0;
  }
  
-#if !defined(__ia64__) && !defined(__x86_64__) && !defined(__hppa__)
+#ifdef __ARCH_WANT_IPC_PARSE_VERSION
+
  
  /**
   *     ipc_parse_version       -       IPC call version
@@ -532,4 +581,4 @@ int ipc_parse_version (int *cmd)
         }
  }
  
-#endif /* __ia64__ */
+#endif /* __ARCH_WANT_IPC_PARSE_VERSION */