2 * Experimental data distribution table generator
3 * Taken from the uncopyrighted NISTnet code.
5 * Rread in a series of "random" data values, either
6 * experimentally or generated from some probability distribution.
7 * From this, create the inverse distribution table used to approximate
15 #include <sys/types.h>
20 readdoubles(FILE *fp, int *number)
27 fstat(fileno(fp), &info);
28 if (info.st_size > 0) {
29 limit = 2*info.st_size/sizeof(double); /* @@ approximate */
34 x = calloc(limit, sizeof(double));
36 perror("double alloc");
40 for (i=0; i<limit; ++i){
41 fscanf(fp, "%lf", &x[i]);
51 arraystats(double *x, int limit, double *mu, double *sigma, double *rho)
54 double sumsquare=0.0, sum=0.0, top=0.0;
57 for (i=0; i<limit; ++i){
58 sumsquare += x[i]*x[i];
63 *sigma = sqrt((sumsquare - (double)n*(*mu)*(*mu))/(double)(n-1));
65 for (i=1; i < n; ++i){
66 top += ((double)x[i]- *mu)*((double)x[i-1]- *mu);
67 sigma2 += ((double)x[i-1] - *mu)*((double)x[i-1] - *mu);
73 /* Create a (normalized) distribution table from a set of observed
74 * values. The table is fixed to run from (as it happens) -4 to +4,
75 * with granularity .00002.
78 #define TABLESIZE 16384/4
79 #define TABLEFACTOR 8192
81 #define MINSHORT -32768
82 #define MAXSHORT 32767
85 /* Since entries in the inverse are scaled by TABLEFACTOR, and can't be bigger
86 * than MAXSHORT, we don't bother looking at a larger domain than this:
88 #define DISTTABLEDOMAIN ((MAXSHORT/TABLEFACTOR)+1)
89 #define DISTTABLEGRANULARITY 50000
90 #define DISTTABLESIZE (DISTTABLEDOMAIN*DISTTABLEGRANULARITY*2)
93 makedist(double *x, int limit, double mu, double sigma)
96 int i, index, first=DISTTABLESIZE, last=0;
99 table = calloc(DISTTABLESIZE, sizeof(int));
101 perror("table alloc");
105 for (i=0; i < limit; ++i) {
106 /* Normalize value */
107 input = (x[i]-mu)/sigma;
109 index = (int)rint((input+DISTTABLEDOMAIN)*DISTTABLEGRANULARITY);
110 if (index < 0) index = 0;
111 if (index >= DISTTABLESIZE) index = DISTTABLESIZE-1;
121 /* replace an array by its cumulative distribution */
123 cumulativedist(int *table, int limit, int *total)
127 while (--limit >= 0) {
135 inverttable(int *table, int inversesize, int tablesize, int cumulative)
137 int i, inverseindex, inversevalue;
139 double findex, fvalue;
141 inverse = (short *)malloc(inversesize*sizeof(short));
142 for (i=0; i < inversesize; ++i) {
143 inverse[i] = MINSHORT;
145 for (i=0; i < tablesize; ++i) {
146 findex = ((double)i/(double)DISTTABLEGRANULARITY) - DISTTABLEDOMAIN;
147 fvalue = (double)table[i]/(double)cumulative;
148 inverseindex = (int)rint(fvalue*inversesize);
149 inversevalue = (int)rint(findex*TABLEFACTOR);
150 if (inversevalue <= MINSHORT) inversevalue = MINSHORT+1;
151 if (inversevalue > MAXSHORT) inversevalue = MAXSHORT;
152 inverse[inverseindex] = inversevalue;
158 /* Run simple linear interpolation over the table to fill in missing entries */
160 interpolatetable(short *table, int limit)
162 int i, j, last, lasti = -1;
165 for (i=0; i < limit; ++i) {
166 if (table[i] == MINSHORT) {
167 for (j=i; j < limit; ++j)
168 if (table[j] != MINSHORT)
171 table[i] = last + (i-lasti)*(table[j]-last)/(j-lasti);
173 table[i] = last + (i-lasti)*(MAXSHORT-last)/(limit-lasti);
183 printtable(const short *table, int limit)
187 printf("# This is the distribution table for the experimental distribution.\n");
189 for (i=0 ; i < limit; ++i) {
190 printf("%d%c", table[i],
191 (i % 8) == 7 ? '\n' : ' ');
196 main(int argc, char **argv)
200 double mu, sigma, rho;
207 if (!(fp = fopen(argv[1], "r"))) {
214 x = readdoubles(fp, &limit);
216 fprintf(stderr, "Nothing much read!\n");
219 arraystats(x, limit, &mu, &sigma, &rho);
221 fprintf(stderr, "%d values, mu %10.4f, sigma %10.4f, rho %10.4f\n",
222 limit, mu, sigma, rho);
225 table = makedist(x, limit, mu, sigma);
227 cumulativedist(table, DISTTABLESIZE, &total);
228 inverse = inverttable(table, TABLESIZE, DISTTABLESIZE, total);
229 interpolatetable(inverse, TABLESIZE);
230 printtable(inverse, TABLESIZE);