X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=python%2Fvduimpl.c;fp=python%2Fvduimpl.c;h=21918260c88193a03341337f861de71ba7cd291a;hb=0c1f40415d91b68e560aaf31b390d106b12998e5;hp=0000000000000000000000000000000000000000;hpb=8cf13bb177d92c93eb73dc8939777150536c2d00;p=util-vserver.git diff --git a/python/vduimpl.c b/python/vduimpl.c new file mode 100644 index 0000000..2191826 --- /dev/null +++ b/python/vduimpl.c @@ -0,0 +1,578 @@ +/* Copyright 2005 Princeton University */ + +#include + +#define _LARGEFILE64_SOURCE 1 + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +/* + * hash table implementation + */ + +typedef ino64_t KEY; +typedef KEY *PKEY; +typedef struct stat64 VAL; +typedef VAL *PVAL; + +static inline +unsigned int +HASH(PKEY key){ + return (int) *key; +} + +static inline +unsigned int // boolean +EQUAL(PKEY key1, PKEY key2){ + return *key1 == *key2; +} + +#ifndef MIN +#define MIN(x,y) (((x)<(y))?(x):(y)) +#endif // MIN + +#ifndef MAX +#define MAX(x,y) (((x)>(y))?(x):(y)) +#endif // MAX + + +/* + * hash table support for efficient lookup of duplicate inodes + */ + +#define Multiplier (0x9e3779b9) +#define MaxLogBuckets (((sizeof (unsigned long))*8) - 2) +#define MaxBuckets (1< 2*MinDensity; *) + * to avoid excessive probes, we must try to keep MaxDensity low. *) + * Divide by 100 before using + */ +#define MaxDensity 75 /* max numEntries/NUMBER(buckets) */ +#define MinDensity 20 /* min numEntries/NUMBER(buckets) */ +#define IdealDensity 50 +#define BITSIZE(x) (sizeof(x)*8) + +#define NEW(type,num) ((type*)malloc(sizeof(type)*num)) +#define DISPOSE(ptr) (free((void*)ptr)) + +/* + * Generic Hash Entry Type + */ + +typedef struct VoidList { + struct VoidList *tail; +} VoidList, *PVoidList; + +typedef struct HashTable { + PVoidList *buckets; + unsigned int numBuckets; // number of buckets + unsigned int minLogBuckets; // minimum value for Log_2(initial size) + unsigned int logBuckets; // CEILING(Log2(NUMBER(buckets^))) + unsigned int maxEntries; // maximum number of entries + unsigned int minEntries; // minimum number of entries + unsigned int numEntries; // current num of entries in table + PVoidList cache; // cache of removed elements + int cacheSize; // current size of the cache + int maxCacheSize; // maximum size, -1 means unbounded, 0 no cache +} HashTable, *PHashTable; + +/* + * Hash Prototypes + */ + +PHashTable +Init(PHashTable tbl, unsigned int n, int maxCacheSize); + +void +Dispose(PHashTable tbl); + +unsigned int +Log_2(unsigned int x); + +void +NewBuckets(PHashTable tbl, unsigned int logBuckets); + +/* + * Generic Hash Table support + */ + +PHashTable +Init(PHashTable tbl, unsigned int n, int maxCacheSize){ + int idealBuckets; + int minBuckets; + + idealBuckets = MIN(((n*100)/IdealDensity),MaxBuckets); + minBuckets = MAX(MinBuckets, idealBuckets); + tbl->minLogBuckets = Log_2(minBuckets); + + NewBuckets(tbl, tbl->minLogBuckets); + tbl->numEntries = 0; + tbl->maxCacheSize = maxCacheSize; + tbl->cacheSize = 0; + tbl->cache = 0; + return tbl; +} // Init() + + +/* + * Internal procedures + */ + +unsigned int +Log_2(unsigned int x){ + /* return CEILING(LOG_2(x)) */ + unsigned int log = 0; + unsigned int n= 1; + + assert(x != 0); + while ((log < MaxLogBuckets) && (x > n)){ + log++; + n += n; + } + return log; +} + +void +NewBuckets(PHashTable tbl, unsigned int logBuckets){ + /* Allocate "2^logBuckets" buckets. */ + unsigned int numBuckets = 1 << logBuckets; + PVoidList *b; + unsigned int i; + + tbl->buckets = NEW(PVoidList, numBuckets); + tbl->numBuckets = numBuckets; + b = tbl->buckets; + + for (i=0; inumBuckets; i++){ + b[i] = NULL; + } + tbl->logBuckets = logBuckets; + tbl->maxEntries = MaxDensity * numBuckets / 100; + tbl->minEntries = MinDensity * numBuckets / 100; +} + +#ifndef NULL +#define NULL (void*)0 +#endif + +#ifndef TRUE +#define TRUE 1 +#endif + +#ifndef FALSE +#define FALSE 0 +#endif + +/* + * Type specific hash entry + */ +typedef struct EntryList { + struct EntryList *tail; + KEY key; + VAL val; +}EntryList, *PEntryList; + +/* + * Type specific Hash implementation functions + */ + +static +void +Rehash(PHashTable tbl, unsigned int logBuckets) { + /* Reallocate "2^logBuckets" buckets, and rehash the entries into + * the new table. + */ + + PVoidList *oldBucketPointer; + PEntryList *ob, obi; + PEntryList *nb, *nbh; + PEntryList that, tail; + unsigned int index; + unsigned int i; + unsigned int oldNumBuckets; + + return; + assert(logBuckets <= MaxLogBuckets); + assert(logBuckets >= tbl->minLogBuckets); + oldBucketPointer = tbl->buckets; + ob = (PEntryList*)tbl->buckets; + oldNumBuckets = tbl->numBuckets; + + NewBuckets(tbl, logBuckets); + nb = (PEntryList*)tbl->buckets; + + for(i=0;ikey))*Multiplier)>>(BITSIZE(unsigned long)-tbl->logBuckets); + nbh = &(nb[index]); + tail = that->tail; + that->tail = *nbh; + *nbh = that; + that = tail; + } + } + DISPOSE (oldBucketPointer); +} + +static inline +unsigned int /* boolean */ +Get(PHashTable tbl, PKEY key, PVAL *val){ + PEntryList that; + unsigned int index; + + index = (HASH(key)*Multiplier)>>(BITSIZE(unsigned long)-tbl->logBuckets); + that = (PEntryList)tbl->buckets[index]; + while ((that != NULL) && !EQUAL(key,&(that->key))) { + that = that->tail; + } + if (that != NULL){ + *val = &that->val; + return TRUE; + } + else { + return FALSE; + } +} + +static inline +unsigned int /* boolean */ +Put(PHashTable tbl, PKEY key, PVAL *val){ + PEntryList that; + PEntryList *first; + unsigned int index; + unsigned int res; + + index = (HASH(key)*Multiplier)>>(BITSIZE(unsigned long)-tbl->logBuckets); + first = (PEntryList*)&(tbl->buckets[index]); + that = *first; + while ((that != NULL) && !EQUAL(key, &(that->key))){ + that = that->tail; + } + + /* found an entry in the hash table given above key */ + if (that != NULL){ + res = TRUE; + } + else { + /* check if we can reuse something from the cache */ + if (tbl->cache != NULL) { + that = (PEntryList)tbl->cache; + tbl->cache = (PVoidList)tbl->cache->tail; + that->key = *key; + that->tail = *first; + *first = that; + } + else { + that = NEW(EntryList,1); + that->key = *key; + that->tail = *first; + *first = that; + } + that->val = **val; + + tbl->numEntries++; + if ((tbl->logBuckets < MaxLogBuckets) + && (tbl->numEntries > tbl->maxEntries)){ + Rehash(tbl, tbl->logBuckets + 1); /* too crowded */ + } + res = FALSE; + } + *val = &that->val; + return res; + +} + +static inline +int +Delete(PHashTable tbl,PKEY key){ + PEntryList that, prev; + PEntryList *first; + unsigned int index; + + index = (HASH(key)*Multiplier)>>(BITSIZE(unsigned long)-tbl->logBuckets); + first = (PEntryList*)&(tbl->buckets[index]); + that = *first; + prev = NULL; + + while ((that != NULL) && !EQUAL(key, &(that->key))){ + prev = that; + that = that->tail; + } + if (that != NULL) { + if (prev == NULL) { + *first = that->tail; + } + else { + prev->tail = that->tail; + } + if ((tbl->maxCacheSize == -1)||(tbl->cacheSize < tbl->maxCacheSize)) { + that->tail = (PEntryList)tbl->cache; + tbl->cache = (PVoidList)that; + tbl->cacheSize++; + } + else { + DISPOSE (that); + } + tbl->numEntries--; + if (tbl->maxCacheSize == 0) { + if ((tbl->logBuckets > tbl->minLogBuckets) + && (tbl->numEntries < tbl->minEntries)) { + Rehash(tbl, tbl->logBuckets - 1); /* too sparse */ + } + } + return TRUE; + } + else { + return FALSE; + } +} + +typedef void (*callback)(PKEY key, PVAL val); + +void +Iterate(PHashTable tbl, callback fn) +{ + PVoidList that; + unsigned int i; + + for(i=0;inumBuckets;i++) { + that = tbl->buckets[i]; + while ( that != (PVoidList)0 ) { + PEntryList entry = (PEntryList)that; + fn(&entry->key,&entry->val); + that = that->tail; + } + } +} + +void +Dispose(PHashTable tbl) +{ + PVoidList that, next; + unsigned int i; + + for(i=0;inumBuckets;i++) { + that = tbl->buckets[i]; + while( that != NULL) { + next = that->tail; + DISPOSE (that); + tbl->numEntries--; + that = next; + } + } + DISPOSE(tbl->buckets); + assert(tbl->numEntries = 0); +} + +static int /* boolean */ +INOPut(PHashTable tbl, ino64_t* key, struct stat64 **val){ + return Put(tbl, key, val); +} + +__extension__ typedef long long longlong; + +struct stats { + longlong inodes; + longlong blocks; + longlong size; +}; + +static short verbose = 0; + +static int vdu_onedir (PHashTable tbl, struct stats *__s, char const *path) +{ + char const *foo = path; + struct stat64 dirst, st; + struct dirent *ent; + char *name; + DIR *dir; + int dirfd; + int res = 0; + longlong dirsize, dirinodes, dirblocks; + + dirsize = dirinodes = dirblocks = 0; + + // A handle to speed up chdir + if ((dirfd = open (path,O_RDONLY)) == -1) { + return -1; + } + + if (fchdir (dirfd) == -1) { + return -1; + } + + if (fstat64 (dirfd,&dirst) != 0) { + return -1; + } + + if ((dir = opendir (".")) == NULL) { + return -1; + } + + /* Walk the directory entries and compute the sum of inodes, + * blocks, and disk space used. This code will recursively descend + * down the directory structure. + */ + + while ((ent=readdir(dir))!=NULL){ + if (lstat64(ent->d_name,&st)==-1){ + continue; + } + + dirinodes ++; + + if (S_ISREG(st.st_mode)){ + if (st.st_nlink > 1){ + struct stat64 *val; + int nlink; + + /* Check hash table if we've seen this inode + * before. Note that the hash maintains a + * (inode,struct stat) key value pair. + */ + + val = &st; + + (void) INOPut(tbl,&st.st_ino,&val); + + /* Note that after the INOPut call "val" refers to the + * value entry in the hash table --- not &st. This + * means that if the inode has been put into the hash + * table before, val will refer to the first st that + * was put into the hashtable. Otherwise, if it is + * the first time it is put into the hash table, then + * val will be equal to this &st. + */ + nlink = val->st_nlink; + nlink --; + + /* val refers to value in hash tbale */ + if (nlink == 0) { + + /* We saw all hard links to this particular inode + * as part of this sweep of vdu. So account for + * the size and blocks required by the file. + */ + + dirsize += val->st_size; + dirblocks += val->st_blocks; + + /* Do not delete the (ino,val) tuple from the tbl, + * as we need to handle the case when we are + * double counting a file due to a bind mount. + */ + val->st_nlink = 0; + + } else if (nlink > 0) { + val->st_nlink = nlink; + } else /* if(nlink < 0) */ { + /* We get here when we are double counting nlinks + due a bind mount. */ + + /* DO NOTHING */ + } + } else { + dirsize += st.st_size; + dirblocks += st.st_blocks; + } + + } else if (S_ISDIR(st.st_mode)) { + if ((st.st_dev == dirst.st_dev) && + (strcmp(ent->d_name,".")!=0) && + (strcmp(ent->d_name,"..")!=0)) { + + dirsize += st.st_size; + dirblocks += st.st_blocks; + + name = strdup(ent->d_name); + if (name==0) { + return -1; + } + res |= vdu_onedir(tbl,__s,name); + free(name); + fchdir(dirfd); + } + } else { + /* dirsize += st.st_size; */ + /* dirblocks += st.st_blocks; */ + } + } + closedir (dir); + close (dirfd); + __s->inodes += dirinodes; + __s->blocks += dirblocks; + __s->size += dirsize; + if (verbose) { + printf("%16lld %16lld %16lld %s\n",dirinodes, dirblocks, dirsize,foo); + printf("%16lld %16lld %16lld %s\n",__s->inodes, __s->blocks, __s->size,foo); + } + + return res; +} + + +static PyObject * +do_vdu(PyObject *self, PyObject *args) +{ + PyObject *tuple; + + const char *path; + int res; + struct stats s; + HashTable tbl; + int cwd_fd; + + if (!PyArg_ParseTuple(args, "s", &path)) + return Py_None; + + /* init of tbl and stats */ + s.inodes = s.blocks = s.size = 0; + (void) Init(&tbl,0,0); + + cwd_fd = open(".", O_RDONLY); + res = vdu_onedir(&tbl, &s, path); + fchdir(cwd_fd); + close(cwd_fd); + + /* deallocate whatever has been added to tbl */ + Dispose(&tbl); + + /* create a python (inode, block, size) tuple */ + tuple = Py_BuildValue("(L,L,L)", + s.inodes, + s.blocks>>1, /* NOTE: div by 2 to adjust + * 512b block count to 1K + * block count + */ + s.size); + return (res == -1) ? PyErr_SetFromErrno(PyExc_OSError) : tuple; +} + +static PyMethodDef methods[] = { + { "vdu", do_vdu, METH_VARARGS, + "perform vdu operation on directory tree" }, + { NULL, NULL, 0, NULL } +}; + +PyMODINIT_FUNC +initvduimpl(void) +{ + Py_InitModule("vduimpl", methods); +}