Moved vduimpl.c from util-python, import common pybuild infrastructure
authorSteve Muir <smuir@cs.princeton.edu>
Mon, 20 Jun 2005 19:59:42 +0000 (19:59 +0000)
committerSteve Muir <smuir@cs.princeton.edu>
Mon, 20 Jun 2005 19:59:42 +0000 (19:59 +0000)
python/Makefile
python/pybuild.mk [new file with mode: 0644]
python/setup.py
python/vduimpl.c [new file with mode: 0644]
util-vserver.spec

index 2c8d28b..32d624a 100644 (file)
@@ -1,21 +1,15 @@
-INCLUDES := -I.. -I../lib -I/usr/include/python2.3
-CPPFLAGS = $(DEFS) $(INCLUDES)
-CFLAGS = $(CPPFLAGS) -g -Wall
-COMPILE = $(CC) $(CFLAGS)
+INCLUDES := -I.. -I../lib
 LIBS = -L../lib -lvserver
-LINK = $(CC) $(LDFLAGS)
 
-NATIVE_MODS := vserverimpl.so
+PY_MODS := vserver.py
+PY_EXT_MODS := vduimpl.so vserverimpl.so
 
-all: $(NATIVE_MODS)
+all: py-build
 
-$(NATIVE_MODS): %.so: %.o
-       $(LINK) -shared -o $@ $^ $(LIBS)
-
-%.o: %.c
-       $(COMPILE) -c $<
-
-install:
-       python setup.py install --root=$(INSTALL_ROOT)
+install: py-install
        ln -s /usr/lib/util-vserver/util-vserver-vars \
        $(INSTALL_ROOT)/usr/lib/python2.3/site-packages/util_vserver_vars.py
+
+-include pybuild.mk
+
+.PHONY: all install
diff --git a/python/pybuild.mk b/python/pybuild.mk
new file mode 100644 (file)
index 0000000..140cc14
--- /dev/null
@@ -0,0 +1,32 @@
+# Copyright 2005 Princeton University
+#
+# PY_MODS variable should be list of Python source modules
+# PY_EXT_MODS should be list of Python extension modules (.so) that need
+# to be built
+#
+
+_PY_TMPDIR := .pybuild
+
+# use strip to remove extra whitespace
+_PY_SETUP = $(strip python setup.py \
+       $(foreach i,DEFS INCLUDES LIBS PY_MODS PY_EXT_MODS,\
+       $(if $(subst undefined,,$(origin $i)),$i="$($i)")))
+
+_PY_BUILD_ARGS := -t $(_PY_TMPDIR) -b $(_PY_TMPDIR)/build
+
+py-build: .pybuild/.prep-done $(PY_MODS) $(PY_EXT_MODS)
+
+.pybuild/.prep-done:
+       mkdir build .pybuild
+       ln -s ../build .pybuild
+       touch $@
+
+$(PY_EXT_MODS): PY_EXT_MODS = $@
+
+$(PY_EXT_MODS): %.so: %.c
+       $(_PY_SETUP) build_ext -f $(_PY_BUILD_ARGS)
+
+py-install:
+       $(_PY_SETUP) install --root=$(INSTALL_ROOT)
+
+.PHONY: py-build py-install py-clean
index b0c232e..507a5c2 100644 (file)
@@ -1,14 +1,40 @@
+#!/usr/bin/python
+
+import re
+import sys
+
 from distutils.core import setup, Extension
 
-# XXX - need a way to share crap with the Makefile
-setup(name = "util-vserver",
-      version = "0.1",
-      description = "Python modules for vserver operations",
-      author = "Steve Muir",
-      author_email = "smuir@cs.princeton.edu",
-      py_modules = ["vserver"],
-      ext_modules = [Extension("vserverimpl",
-                               ["vserverimpl.c"],
-                               include_dirs = ["..", "../lib"],
-                               library_dirs = ["../lib"],
-                               libraries = ["vserver"])])
+MODULE_NAME_RE = "[A-Za-z_]+"
+
+if __name__ == "__main__":
+
+    build_arg_re = re.compile(r"^([A-Z_]+)= *(.*)")
+    def split_args((build_args, argv), arg):
+        m = build_arg_re.match(arg)
+        if m:
+            (k, v) = m.groups()
+            build_args[k] = v
+        else:
+            argv += [arg]
+        return (build_args, argv)
+
+    (build_args, argv) = reduce(split_args, sys.argv[1:], ({}, []))
+    sys.argv[1:] = argv
+    extension_args = { "extra_compile_args": ["-Wall"] }
+    if "INCLUDES" in build_args:
+        extension_args["include_dirs"] = re.findall(r"-I([^ ]+)",
+                                                    build_args["INCLUDES"])
+    lib_args = build_args.get("LIBS", "")
+    if lib_args:
+        extension_args["library_dirs"] = re.findall(r"-L([^ ]+)", lib_args)
+        extension_args["libraries"] = re.findall(r"-l([^ ]+)", lib_args)
+    modules = re.findall("(%s).py" % MODULE_NAME_RE,
+                         build_args.get("PY_MODS", ""))
+    extensions = map(lambda modname: Extension(modname,
+                                               [modname + ".c"],
+                                               **extension_args),
+                     re.findall("(%s).so" % MODULE_NAME_RE,
+                                build_args.get("PY_EXT_MODS", "")))
+
+    setup(py_modules = modules, ext_modules = extensions)
diff --git a/python/vduimpl.c b/python/vduimpl.c
new file mode 100644 (file)
index 0000000..00ff5db
--- /dev/null
@@ -0,0 +1,568 @@
+/* Copyright 2005 Princeton University */
+
+#include <Python.h>
+
+#define _LARGEFILE64_SOURCE 1
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <dirent.h>
+#include <errno.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <assert.h>
+
+
+/*
+ * hash table implementation
+ */
+
+typedef ino64_t KEY;
+typedef KEY *PKEY;
+typedef struct stat64 VAL;
+typedef VAL *PVAL;
+
+static inline
+unsigned int
+HASH(PKEY key){
+       return (int) *key;
+}
+
+static inline
+unsigned int // boolean
+EQUAL(PKEY key1, PKEY key2){
+       return *key1 == *key2;
+}
+
+#ifndef MIN
+#define MIN(x,y) (((x)<(y))?(x):(y))
+#endif // MIN
+
+#ifndef MAX
+#define MAX(x,y) (((x)>(y))?(x):(y))
+#endif // MAX
+
+
+/*
+ * hash table support for efficient lookup of duplicate inodes
+ */
+
+#define Multiplier  (0x9e3779b9)
+#define MaxLogBuckets  (((sizeof (unsigned long))*8) - 2)
+#define MaxBuckets     (1<<MaxLogBuckets)
+#define MinLogBuckets  (4)
+#define MinBuckets     (1<<MinLogBuckets)
+
+/* Thresholds for rehashing the table: *)
+ * to avoid crazy oscillations, we must have MaxDensity > 2*MinDensity; *)
+ * to avoid excessive probes, we must try to keep MaxDensity low. *)
+ * Divide by 100 before using
+ */
+#define MaxDensity 75 /* max numEntries/NUMBER(buckets) */
+#define MinDensity 20 /* min numEntries/NUMBER(buckets) */
+#define IdealDensity 50
+#define BITSIZE(x) (sizeof(x)*8)
+
+#define NEW(type,num) ((type*)malloc(sizeof(type)*num))
+#define DISPOSE(ptr) (free((void*)ptr))
+
+/*
+ * Generic Hash Entry Type
+ */
+
+typedef struct VoidList {
+       struct VoidList *tail;
+} VoidList, *PVoidList;
+
+typedef struct HashTable {
+       PVoidList *buckets;
+       unsigned int numBuckets;    // number of buckets
+       unsigned int minLogBuckets; // minimum value for Log_2(initial size) 
+       unsigned int logBuckets;    // CEILING(Log2(NUMBER(buckets^))) 
+       unsigned int maxEntries;    // maximum number of entries 
+       unsigned int minEntries;    // minimum number of entries 
+       unsigned int numEntries;    // current num of entries in table 
+       PVoidList cache;            // cache of removed elements 
+       int cacheSize;              // current size of the cache 
+       int maxCacheSize;           // maximum size, -1 means unbounded, 0 no cache 
+} HashTable, *PHashTable;
+
+/*
+ * Hash Prototypes
+ */
+
+PHashTable
+Init(PHashTable tbl, unsigned int n, int maxCacheSize);
+
+void
+Dispose(PHashTable tbl);
+
+unsigned int
+Log_2(unsigned int x);
+
+void
+NewBuckets(PHashTable tbl, unsigned int logBuckets);
+
+/*
+ * Generic Hash Table support
+ */
+
+PHashTable
+Init(PHashTable tbl, unsigned int n, int maxCacheSize){
+       int idealBuckets;
+       int minBuckets;
+  
+       idealBuckets = MIN(((n*100)/IdealDensity),MaxBuckets);
+       minBuckets = MAX(MinBuckets, idealBuckets);
+       tbl->minLogBuckets = Log_2(minBuckets);
+
+       NewBuckets(tbl, tbl->minLogBuckets);
+       tbl->numEntries = 0;
+       tbl->maxCacheSize = maxCacheSize;
+       tbl->cacheSize = 0;
+       tbl->cache = 0;
+       return tbl;
+} // Init()
+
+
+/*
+ * Internal procedures
+ */
+
+unsigned int
+Log_2(unsigned int x){
+       /* return CEILING(LOG_2(x)) */
+       unsigned int log = 0;
+       unsigned int n= 1;
+
+       assert(x != 0);
+       while ((log < MaxLogBuckets) && (x > n)){
+               log++; 
+               n += n;
+       }
+       return log;
+}
+
+void
+NewBuckets(PHashTable tbl, unsigned int logBuckets){
+       /* Allocate "2^logBuckets" buckets. */
+       unsigned int numBuckets = 1 << logBuckets;
+       PVoidList *b;
+       unsigned int i;
+
+       tbl->buckets = NEW(PVoidList, numBuckets);
+       tbl->numBuckets = numBuckets;
+       b = tbl->buckets;
+
+       for (i=0; i<tbl->numBuckets; i++){
+               b[i] = NULL;
+       }
+       tbl->logBuckets = logBuckets;
+       tbl->maxEntries = MaxDensity * numBuckets / 100;
+       tbl->minEntries = MinDensity * numBuckets / 100;
+}
+
+#ifndef NULL
+#define NULL (void*)0
+#endif
+
+#ifndef TRUE
+#define TRUE 1
+#endif
+
+#ifndef FALSE
+#define FALSE 0
+#endif
+
+/*
+ * Type specific hash entry
+ */
+typedef struct EntryList {
+       struct EntryList *tail;
+       KEY key;
+       VAL val;
+}EntryList, *PEntryList;
+
+/*
+ * Type specific Hash implementation functions
+ */
+
+static
+void
+Rehash(PHashTable tbl, unsigned int logBuckets) {
+       /* Reallocate "2^logBuckets" buckets, and rehash the entries into
+        * the new table.
+        */
+
+       PVoidList *oldBucketPointer;
+       PEntryList *ob, obi;
+       PEntryList *nb, *nbh;
+       PEntryList that, tail;
+       unsigned int index; 
+       unsigned int i;
+       unsigned int oldNumBuckets;
+
+       return;
+       assert(logBuckets <= MaxLogBuckets);
+       assert(logBuckets >= tbl->minLogBuckets);
+       oldBucketPointer = tbl->buckets;
+       ob = (PEntryList*)tbl->buckets;
+       oldNumBuckets = tbl->numBuckets;
+
+       NewBuckets(tbl, logBuckets);
+       nb = (PEntryList*)tbl->buckets;
+
+       for(i=0;i<oldNumBuckets;i++){
+               obi = ob[i];
+               that = obi;
+               while (that != NULL) {
+                       index = (HASH(&(that->key))*Multiplier)>>(BITSIZE(unsigned long)-tbl->logBuckets);
+                       nbh = &(nb[index]);
+                       tail = that->tail;
+                       that->tail = *nbh;
+                       *nbh = that;
+                       that = tail;
+               }
+       }
+       DISPOSE (oldBucketPointer);
+}
+
+static inline
+unsigned int /* boolean */
+Get(PHashTable tbl, PKEY key, PVAL *val){
+       PEntryList that;
+       unsigned int index;
+
+       index = (HASH(key)*Multiplier)>>(BITSIZE(unsigned long)-tbl->logBuckets);
+       that = (PEntryList)tbl->buckets[index];
+       while ((that != NULL) && !EQUAL(key,&(that->key))) {
+               that = that->tail;
+       }
+       if (that != NULL){
+               *val = &that->val;
+               return TRUE;
+       }
+       else {
+               return FALSE;
+       }
+}
+
+static inline 
+unsigned int /* boolean */
+Put(PHashTable tbl, PKEY key, PVAL *val){
+       PEntryList that;
+       PEntryList *first;
+       unsigned int index;
+       unsigned int res;
+
+       index = (HASH(key)*Multiplier)>>(BITSIZE(unsigned long)-tbl->logBuckets);
+       first = (PEntryList*)&(tbl->buckets[index]);
+       that = *first;
+       while ((that != NULL) && !EQUAL(key, &(that->key))){
+               that = that->tail;
+       }
+  
+       /* found an entry in the hash table given above key */
+       if (that != NULL){
+               res = TRUE;
+       }
+       else {
+               /* check if we can reuse something from the cache */
+               if (tbl->cache != NULL) {
+                       that = (PEntryList)tbl->cache;
+                       tbl->cache = (PVoidList)tbl->cache->tail;
+                       that->key = *key;
+                       that->tail = *first;
+                       *first = that;
+               }
+               else {
+                       that = NEW(EntryList,1);
+                       that->key = *key;
+                       that->tail = *first;
+                       *first = that;
+               }
+               that->val = **val;
+
+               tbl->numEntries++;
+               if ((tbl->logBuckets < MaxLogBuckets)
+                   && (tbl->numEntries > tbl->maxEntries)){
+                       Rehash(tbl, tbl->logBuckets + 1); /* too crowded */
+               }
+               res = FALSE;
+       }
+       *val = &that->val;
+       return res;
+
+}
+
+static inline
+int
+Delete(PHashTable tbl,PKEY key){
+       PEntryList that, prev;
+       PEntryList *first;
+       unsigned int index;
+
+       index = (HASH(key)*Multiplier)>>(BITSIZE(unsigned long)-tbl->logBuckets);
+       first = (PEntryList*)&(tbl->buckets[index]);
+       that = *first;
+       prev = NULL;
+
+       while ((that != NULL) && !EQUAL(key, &(that->key))){
+               prev = that;
+               that = that->tail;
+       }
+       if (that != NULL) {
+               if (prev == NULL) {
+                       *first = that->tail;
+               }
+               else {
+                       prev->tail = that->tail;
+               }
+               if ((tbl->maxCacheSize == -1)||(tbl->cacheSize < tbl->maxCacheSize)) {
+                       that->tail = (PEntryList)tbl->cache;
+                       tbl->cache = (PVoidList)that;
+                       tbl->cacheSize++;
+               }
+               else {
+                       DISPOSE (that);
+               }
+               tbl->numEntries--;
+               if (tbl->maxCacheSize == 0) {
+                       if ((tbl->logBuckets > tbl->minLogBuckets)
+                           && (tbl->numEntries < tbl->minEntries)) {
+                               Rehash(tbl, tbl->logBuckets - 1); /* too sparse */
+                       }
+               }
+               return TRUE;
+       }
+       else {
+               return FALSE;
+       }
+}
+
+typedef void (*callback)(PKEY key, PVAL val);
+
+void
+Iterate(PHashTable tbl, callback fn)
+{
+       PVoidList that;
+       unsigned int i;
+  
+       for(i=0;i<tbl->numBuckets;i++) {
+               that = tbl->buckets[i];
+               while ( that != (PVoidList)0 ) {
+                       PEntryList entry = (PEntryList)that;
+                       fn(&entry->key,&entry->val);
+                       that = that->tail;
+               }
+       }
+}
+
+void
+Dispose(PHashTable tbl)
+{
+       PVoidList that, next;
+       unsigned int i;
+
+       for(i=0;i<tbl->numBuckets;i++) {
+               that = tbl->buckets[i];
+               while( that != NULL) {
+                       next = that->tail;
+                       DISPOSE (that);
+                       tbl->numEntries--;
+                       that = next;
+               }
+       }
+       DISPOSE(tbl->buckets);
+       assert(tbl->numEntries = 0);
+}
+
+static int /* boolean */
+INOPut(PHashTable tbl, ino64_t* key, struct stat64 **val){
+       return Put(tbl, key, val);
+}
+
+__extension__ typedef long long                longlong;
+
+struct stats {
+       longlong inodes;
+       longlong blocks;
+       longlong size;
+};
+
+static short verbose = 0;
+
+static int vdu_onedir (PHashTable tbl, struct stats *__s, char const *path)
+{
+       char const *foo = path;
+       struct stat64 dirst, st;
+       struct dirent *ent;
+       char *name;
+       DIR *dir;
+       int dirfd;
+       int res = 0;
+       longlong dirsize, dirinodes, dirblocks;
+
+       dirsize = dirinodes = dirblocks = 0;
+
+       // A handle to speed up chdir
+       if ((dirfd = open (path,O_RDONLY)) == -1) {
+               return -1;
+       }
+
+       if (fchdir (dirfd) == -1) {
+               return -1;
+       }
+
+       if (fstat64 (dirfd,&dirst) != 0) {
+               return -1;
+       }
+
+       if ((dir = opendir (".")) == NULL) {
+               return -1;
+       }
+
+       /* Walk the directory entries and compute the sum of inodes,
+        * blocks, and disk space used. This code will recursively descend
+        * down the directory structure. 
+        */
+
+       while ((ent=readdir(dir))!=NULL){
+               if (lstat64(ent->d_name,&st)==-1){
+                       continue;
+               }
+       
+               dirinodes ++;
+
+               if (S_ISREG(st.st_mode)){
+                       if (st.st_nlink > 1){
+                               struct stat64 *val;
+                               int nlink;
+
+                               /* Check hash table if we've seen this inode
+                                * before. Note that the hash maintains a
+                                * (inode,struct stat) key value pair.
+                                */
+
+                               val = &st;
+
+                               (void) INOPut(tbl,&st.st_ino,&val);
+
+                               /* Note that after the INOPut call "val" refers to the
+                                * value entry in the hash table --- not &st.  This
+                                * means that if the inode has been put into the hash
+                                * table before, val will refer to the first st that
+                                * was put into the hashtable.  Otherwise, if it is
+                                * the first time it is put into the hash table, then
+                                * val will be equal to this &st.
+                                */
+                               nlink = val->st_nlink;
+                               nlink --;
+
+                               /* val refers to value in hash tbale */
+                               if (nlink == 0) {
+
+                                       /* We saw all hard links to this particular inode
+                                        * as part of this sweep of vdu. So account for
+                                        * the size and blocks required by the file.
+                                        */
+
+                                       dirsize += val->st_size;
+                                       dirblocks += val->st_blocks;
+
+                                       /* Do not delete the (ino,val) tuple from the tbl,
+                                        * as we need to handle the case when we are
+                                        * double counting a file due to a bind mount.
+                                        */
+                                       val->st_nlink = 0;
+
+                               } else if (nlink > 0) {
+                                       val->st_nlink = nlink;
+                               } else /* if(nlink < 0) */ {
+                                       /* We get here when we are double counting nlinks
+                                          due a bind mount. */
+
+                                       /* DO NOTHING */
+                               }
+                       } else {
+                               dirsize += st.st_size;
+                               dirblocks += st.st_blocks;
+                       }
+
+               } else if (S_ISDIR(st.st_mode)) {
+                       if ((st.st_dev == dirst.st_dev) &&
+                           (strcmp(ent->d_name,".")!=0) &&
+                           (strcmp(ent->d_name,"..")!=0)) {
+
+                               dirsize += st.st_size;
+                               dirblocks += st.st_blocks;
+
+                               name = strdup(ent->d_name);
+                               if (name==0) {
+                                       return -1;
+                               }
+                               res |= vdu_onedir(tbl,__s,name);
+                               free(name);
+                               fchdir(dirfd);
+                       }
+               } else {
+                       /* dirsize += st.st_size; */
+                       /* dirblocks += st.st_blocks; */
+               }
+       }
+       closedir (dir);
+       close (dirfd);
+       __s->inodes += dirinodes;
+       __s->blocks += dirblocks;
+       __s->size   += dirsize;
+       if (verbose) {
+               printf("%16lld %16lld %16lld %s\n",dirinodes, dirblocks, dirsize,foo);
+               printf("%16lld %16lld %16lld %s\n",__s->inodes, __s->blocks, __s->size,foo);
+       }
+
+       return res;
+}
+
+
+static PyObject *
+do_vdu(PyObject *self, PyObject *args)
+{
+       PyObject *tuple;
+
+       const char *path;
+       int res;
+       struct stats s;
+       HashTable tbl;
+
+       if (!PyArg_ParseTuple(args, "s", &path))
+               return Py_None;
+
+       /* init of tbl and stats */
+       s.inodes = s.blocks = s.size = 0;
+       (void) Init(&tbl,0,0);
+
+       res = vdu_onedir(&tbl, &s, path);
+
+       /* deallocate whatever has been added to tbl */
+       Dispose(&tbl);
+
+       /* create a python (inode, block, size) tuple */
+       tuple = Py_BuildValue("(L,L,L)",s.inodes,s.blocks,s.size);
+       return (res == -1) ? PyErr_SetFromErrno(PyExc_OSError) : tuple;
+}
+
+static PyMethodDef  methods[] = {
+       { "vdu", do_vdu, METH_VARARGS,
+         "perform vdu operation on directory tree" },
+       { NULL, NULL, 0, NULL }
+};
+
+PyMODINIT_FUNC
+initvduimpl(void)
+{
+       Py_InitModule("vduimpl", methods);
+}
index ef25570..4350d8c 100644 (file)
@@ -1,6 +1,6 @@
 %define name util-vserver
 %define version 0.30
-%define release 12.planetlab%{?date:.%{date}}
+%define release 13.planetlab%{?date:.%{date}}
 
 Vendor: PlanetLab
 Packager: PlanetLab Central <support@planet-lab.org>
@@ -162,6 +162,7 @@ but more readily accessible from Python code.
 %files py23
 %defattr(0644,root,root)
 /usr/lib/python2.3/site-packages/util_vserver_vars.py
+/usr/lib/python2.3/site-packages/vduimpl.so
 /usr/lib/python2.3/site-packages/vserver.py
 /usr/lib/python2.3/site-packages/vserver.pyc
 /usr/lib/python2.3/site-packages/vserverimpl.so
@@ -169,6 +170,9 @@ but more readily accessible from Python code.
 
 
 %changelog
+* Mon Jun 20 2005 Steve Muir <smuir@cs.princeton.edu>
+- import Marc's vdu implementation
+
 * Wed Jun 15 2005 Steve Muir <smuir@cs.princeton.edu>
 - 'vserver-init start' functionality subsumed by Node Manager