From: Marc Fiuczynski Date: Tue, 16 Nov 2004 04:51:30 +0000 (+0000) Subject: PL3026: This is the upgraded version of vdu that maintains an internal X-Git-Tag: after-util-vserver-0_30_208-revert~211 X-Git-Url: http://git.onelab.eu/?a=commitdiff_plain;ds=sidebyside;h=02ca3cb72401c1bfb9361c8ea340d065a6d9a377;p=util-vserver.git PL3026: This is the upgraded version of vdu that maintains an internal hash table of files with a nlink count > 1. Only if vdu sees all hard links to a particular inode does it add its size and block count to the total. --- diff --git a/src/vdu.c b/src/vdu.c index 04c060c..19b5dec 100644 --- a/src/vdu.c +++ b/src/vdu.c @@ -1,4 +1,4 @@ -// $Id: vdu.c,v 1.1 2003/09/29 22:01:57 ensc Exp $ +// $Id: vdu-new.c,v 1.2 2004/08/17 14:44:14 mef-pl_kernel Exp $ // Copyright (C) 2003 Enrico Scholz // based on vdu.cc by Jacques Gelinas @@ -17,11 +17,6 @@ // along with this program; if not, write to the Free Software // Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. -#ifdef HAVE_CONFIG_H -# include -#endif -#include "compat.h" - #include #include #include @@ -31,19 +26,21 @@ #include #include #include - #include -#include "ext2fs.h" -// Patch to help compile this utility on unpatched kernel source -#ifndef EXT2_IMMUTABLE_FILE_FL - #define EXT2_IMMUTABLE_FILE_FL 0x00000010 - #define EXT2_IMMUTABLE_LINK_FL 0x00008000 -#endif +#include + +#include "vdu.h" +HashTable tbl; + +static int // boolean +INOPut(PHashTable tbl, ino_t* key, struct stat **val){ + return Put(tbl, key, val); +} -//__extension__ typedef long long longlong; -__extension__ typedef long longlong; +__extension__ typedef long long longlong; +//__extension__ typedef long longlong; static longlong inodes; static longlong blocks; @@ -62,6 +59,7 @@ void panic(char *s) { static void vdu_onedir (char const *path) { + char const *foo = path; struct stat dirst, st; struct dirent *ent; char *name; @@ -94,8 +92,9 @@ static void vdu_onedir (char const *path) /* Walk the directory entries and compute the sum of inodes, - blocks, and disk space used. This code will recursively descend - down the directory structure. */ + * blocks, and disk space used. This code will recursively descend + * down the directory structure. + */ while ((ent=readdir(dir))!=NULL){ if (lstat(ent->d_name,&st)==-1){ @@ -108,27 +107,58 @@ static void vdu_onedir (char const *path) if (S_ISREG(st.st_mode)){ if (st.st_nlink > 1){ - long flags; - int fd, res; - - if ((fd = open(ent->d_name,O_RDONLY))==-1) { - fprintf (stderr,"Can't open file %s/%s\n",path,ent->d_name); - warning ("open failed"); - continue; - } - - flags = 0; - res = ioctl(fd, EXT2_IOC_GETFLAGS, &flags); - close(fd); - - if ((res == 0) && (flags & EXT2_IMMUTABLE_LINK_FL)){ - if (verbose) - printf ("Skipping %s\n",ent->d_name); - continue; + struct stat *val; + int nlink; + + /* Check hash table if we've seen this inode + * before. Note that the hash maintains a + * (inode,struct stat) key value pair. + */ + + val = &st; + + (void) INOPut(&tbl,&st.st_ino,&val); + + /* Note that after the INOPut call "val" refers to the + * value entry in the hash table --- not &st. This + * means that if the inode has been put into the hash + * table before, val will refer to the first st that + * was put into the hashtable. Otherwise, if it is + * the first time it is put into the hash table, then + * val will be equal to this &st. + */ + nlink = val->st_nlink; + nlink --; + + /* val refers to value in hash tbale */ + if (nlink == 0) { + + /* We saw all hard links to this particular inode + * as part of this sweep of vdu. So account for + * the size and blocks required by the file. + */ + + dirsize += val->st_size; + dirblocks += val->st_blocks; + + /* Do not delete the (ino,val) tuple from the tbl, + * as we need to handle the case when we are + * double counting a file due to a bind mount. + */ + val->st_nlink = 0; + + } else if (nlink > 0) { + val->st_nlink = nlink; + } else /* if(nlink < 0) */ { + /* We get here when we are double counting nlinks + due a bind mount. */ + + /* DO NOTHING */ } + } else { + dirsize += st.st_size; + dirblocks += st.st_blocks; } - dirsize += st.st_size; - dirblocks += st.st_blocks; } else if (S_ISDIR(st.st_mode)) { if ((st.st_dev == dirst.st_dev) && @@ -147,43 +177,71 @@ static void vdu_onedir (char const *path) fchdir(dirfd); } } else { - dirsize += st.st_size; - dirblocks += st.st_blocks; + // dirsize += st.st_size; + // dirblocks += st.st_blocks; } } closedir (dir); close (dirfd); if (verbose) - printf("%8ld %8ld %8ld %s\n",dirinodes, dirblocks, dirsize>>10,path); + printf("%16lld %16lld %16lld %s\n",dirinodes, dirblocks, dirsize,foo); inodes += dirinodes; blocks += dirblocks; size += dirsize; } +static void +Count(ino_t* key, struct stat* val) { + if(val->st_nlink) { + blocks += val->st_blocks; + size += val->st_size; + printf("ino=%ld nlink=%d\n",val->st_ino, val->st_nlink); + } +} - -int main (int argc, char *argv[]) +int +main (int argc, char **argv) { int startdir, i; if (argc < 2){ fprintf (stderr,"vdu version %s\n",VERSION); fprintf (stderr,"vdu directory ...\n\n"); - fprintf (stderr,"Compute the size of a directory tree."); + fprintf (stderr,"Compute the size of a directory tree.\n"); }else{ if ((startdir = open (".",O_RDONLY)) == -1) { fprintf (stderr,"Can't open current working directory\n"); panic("open failed"); } + /* hash table support for hard link count */ + (void) Init(&tbl,0,0); + for (i=1; i>10,argv[i]); + + printf("%16lld %16lld %16lld %s\n", + inodes, + blocks>>1, + size, + argv[i]); if (fchdir (startdir) == -1) { panic("fchdir failed"); } } + + if(0) { + /* show size & blocks for files with nlinks from outside of dir */ + inodes = blocks = size = 0; + Iterate(&tbl,Count); + printf("%16lld %16lld %16lld NOT COUNTED\n", + inodes, + blocks, + size); + } + + // Dispose(&tbl); this fails to delete all entries close(startdir); } return 0; diff --git a/src/vdu.h b/src/vdu.h new file mode 100644 index 0000000..13bc979 --- /dev/null +++ b/src/vdu.h @@ -0,0 +1,374 @@ +#ifndef __VDU_H +#define __VDU_H + +typedef ino_t KEY; +typedef KEY *PKEY; +typedef struct stat VAL; +typedef VAL *PVAL; + +static inline +unsigned int +HASH(PKEY key){ + return (int) *key; +} + +static inline +unsigned int // boolean +EQUAL(PKEY key1, PKEY key2){ + return *key1 == *key2; +} + +#ifndef MIN +#define MIN(x,y) (((x)<(y))?(x):(y)) +#endif // MIN + +#ifndef MAX +#define MAX(x,y) (((x)>(y))?(x):(y)) +#endif // MAX + + +//////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////// +// hash table support for efficient lookup of duplicate inodes */ +//////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////// + +#define Multiplier (0x9e3779b9) +#define MaxLogBuckets (((sizeof (unsigned long))*8) - 2) +#define MaxBuckets (1< 2*MinDensity; *) +// to avoid excessive probes, we must try to keep MaxDensity low. *) +// Divide by 100 before using +#define MaxDensity 75 /* max numEntries/NUMBER(buckets) */ +#define MinDensity 20 /* min numEntries/NUMBER(buckets) */ +#define IdealDensity 50 +#define BITSIZE(x) (sizeof(x)*8) + +#define NEW(type,num) ((type*)malloc(sizeof(type)*num)) +#define DISPOSE(ptr) (free((void*)ptr)) + +//////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////// +// Generic Hash Entry Type +//////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////// + +typedef struct VoidList { + struct VoidList *tail; +} VoidList, *PVoidList; + +typedef struct HashTable { + PVoidList *buckets; + unsigned int numBuckets; // number of buckets + unsigned int minLogBuckets; // minimum value for Log_2(initial size) + unsigned int logBuckets; // CEILING(Log2(NUMBER(buckets^))) + unsigned int maxEntries; // maximum number of entries + unsigned int minEntries; // minimum number of entries + unsigned int numEntries; // current num of entries in table + PVoidList cache; // cache of removed elements + int cacheSize; // current size of the cache + int maxCacheSize; // maximum size, -1 means unbounded, 0 no cache +} HashTable, *PHashTable; + +//////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////// +// Hash Prototypes +//////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////// + +PHashTable +Init(PHashTable tbl, unsigned int n, int maxCacheSize); + +void +Dispose(PHashTable tbl); + +unsigned int +Log_2(unsigned int x); + +void +NewBuckets(PHashTable tbl, unsigned int logBuckets); + +// +// Generic Hash Table support +// + +PHashTable +Init(PHashTable tbl, unsigned int n, int maxCacheSize){ + int idealBuckets; + int minBuckets; + + idealBuckets = MIN(((n*100)/IdealDensity),MaxBuckets); + minBuckets = MAX(MinBuckets, idealBuckets); + tbl->minLogBuckets = Log_2(minBuckets); + + NewBuckets(tbl, tbl->minLogBuckets); + tbl->numEntries = 0; + tbl->maxCacheSize = maxCacheSize; + tbl->cacheSize = 0; + tbl->cache = 0; + return tbl; +} // Init() + + +// +// Internal procedures +// + +unsigned int +Log_2(unsigned int x){ + // return CEILING(LOG_2(x)) + unsigned int log = 0; + unsigned int n= 1; + + assert(x != 0); + while ((log < MaxLogBuckets) && (x > n)){ + log++; + n += n; + } + return log; +} // Log_2() + +void +NewBuckets(PHashTable tbl, unsigned int logBuckets){ + // Allocate "2^logBuckets" buckets. + unsigned int numBuckets = 1 << logBuckets; + PVoidList *b; + unsigned int i; + + tbl->buckets = NEW(PVoidList, numBuckets); + tbl->numBuckets = numBuckets; + b = tbl->buckets; + + for (i=0; inumBuckets; i++){ + b[i] = NULL; + } + tbl->logBuckets = logBuckets; + tbl->maxEntries = MaxDensity * numBuckets / 100; + tbl->minEntries = MinDensity * numBuckets / 100; +} // NewBuckets() + +#ifndef NULL +#define NULL (void*)0 +#endif // !NULL + +#ifndef TRUE +#define TRUE 1 +#endif // !TRUE + +#ifndef FALSE +#define FALSE 0 +#endif // !FALSE + +//////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////// +// Type specific hash entry +//////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////// +typedef struct EntryList { + struct EntryList *tail; + KEY key; + VAL val; +}EntryList, *PEntryList; + +//////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////// +// Type specific Hash implementation functions +//////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////// + +static +void +Rehash(PHashTable tbl, unsigned int logBuckets) { + // Reallocate "2^logBuckets" buckets, and rehash the entries into + // the new table. + + PVoidList *oldBucketPointer; + PEntryList *ob, obi; + PEntryList *nb, *nbh; + PEntryList that, tail; + unsigned int index; + unsigned int i; + unsigned int oldNumBuckets; + + return; + assert(logBuckets <= MaxLogBuckets); + assert(logBuckets >= tbl->minLogBuckets); + oldBucketPointer = tbl->buckets; + ob = (PEntryList*)tbl->buckets; + oldNumBuckets = tbl->numBuckets; + + NewBuckets(tbl, logBuckets); + nb = (PEntryList*)tbl->buckets; + + for(i=0;ikey))*Multiplier)>>(BITSIZE(unsigned long)-tbl->logBuckets); + nbh = &(nb[index]); + tail = that->tail; + that->tail = *nbh; + *nbh = that; + that = tail; + } + } + DISPOSE (oldBucketPointer); +} + +static inline +unsigned int // boolean +Get(PHashTable tbl, PKEY key, PVAL *val){ + PEntryList that; + unsigned int index; + + index = (HASH(key)*Multiplier)>>(BITSIZE(unsigned long)-tbl->logBuckets); + that = (PEntryList)tbl->buckets[index]; + while ((that != NULL) && !EQUAL(key,&(that->key))) { + that = that->tail; + } + if (that != NULL){ + *val = &that->val; + return TRUE; + } + else { + return FALSE; + } +} // Get() + +static inline +unsigned int // boolean +Put(PHashTable tbl, PKEY key, PVAL *val){ + PEntryList that; + PEntryList *first; + unsigned int index; + unsigned int res; + + index = (HASH(key)*Multiplier)>>(BITSIZE(unsigned long)-tbl->logBuckets); + first = (PEntryList*)&(tbl->buckets[index]); + that = *first; + while ((that != NULL) && !EQUAL(key, &(that->key))){ + that = that->tail; + } + + // found an entry in the hash table given above key + if (that != NULL){ + res = TRUE; + } + else { + // check if we can reuse something from the cache + if (tbl->cache != NULL) { + that = (PEntryList)tbl->cache; + tbl->cache = (PVoidList)tbl->cache->tail; + that->key = *key; + that->tail = *first; + *first = that; + } + else { + that = NEW(EntryList,1); + that->key = *key; + that->tail = *first; + *first = that; + } + that->val = **val; + + tbl->numEntries++; + if ((tbl->logBuckets < MaxLogBuckets) + && (tbl->numEntries > tbl->maxEntries)){ + Rehash(tbl, tbl->logBuckets + 1); // too crowded + } + res = FALSE; + } + *val = &that->val; + return res; + +} // Put() + +static inline +int +Delete(PHashTable tbl,PKEY key){ + PEntryList that, prev; + PEntryList *first; + unsigned int index; + + index = (HASH(key)*Multiplier)>>(BITSIZE(unsigned long)-tbl->logBuckets); + first = (PEntryList*)&(tbl->buckets[index]); + that = *first; + prev = NULL; + + while ((that != NULL) && !EQUAL(key, &(that->key))){ + prev = that; + that = that->tail; + } + if (that != NULL) { + if (prev == NULL) { + *first = that->tail; + } + else { + prev->tail = that->tail; + } + if ((tbl->maxCacheSize == -1)||(tbl->cacheSize < tbl->maxCacheSize)) { + that->tail = (PEntryList)tbl->cache; + tbl->cache = (PVoidList)that; + tbl->cacheSize++; + } + else { + DISPOSE (that); + } + tbl->numEntries--; + if (tbl->maxCacheSize == 0) { + if ((tbl->logBuckets > tbl->minLogBuckets) + && (tbl->numEntries < tbl->minEntries)) { + Rehash(tbl, tbl->logBuckets - 1); // too sparse + } + } + return TRUE; + } + else { + return FALSE; + } +} // Delete() + +typedef void (*callback)(PKEY key, PVAL val); + +void +Iterate(PHashTable tbl, callback fn) +{ + PVoidList that; + unsigned int i; + + for(i=0;inumBuckets;i++) { + that = tbl->buckets[i]; + while ( that != (PVoidList)0 ) { + PEntryList entry = (PEntryList)that; + fn(&entry->key,&entry->val); + that = that->tail; + } + } +} + +void +Dispose(PHashTable tbl) +{ + PVoidList that, next; + unsigned int i; + + for(i=0;inumBuckets;i++) { + that = tbl->buckets[i]; + while( that != NULL) { + next = that->tail; + DISPOSE (that); + tbl->numEntries--; + that = next; + } + } + DISPOSE(tbl->buckets); + assert(tbl->numEntries = 0); +} // Dispose; + + + +#endif // __VDU_H