add new scripts
[monitor.git] / statistics / prep.r
1
2 source("functions.r");
3
4 ikern <- read.csv("/Users/soltesz/Downloads/out.csv", TRUE, sep=",")
5 f<-factor(ikern$kernel_version, sort(unique(ikern$kernel_version)), sequence(length(unique(ikern$kernel_version))))
6
7 u<-ikern$uptime/(60*60*24)
8
9 current_time <- as.numeric(format(Sys.time(), "%s"))
10 i<-(current_time-ikern$install_date)/(60*60*24)
11
12 plot(f,u)
13
14
15 sites <- read.csv("/Users/soltesz/Downloads/sites.csv", TRUE, sep=",")
16 f<-factor(sites$status, sort(unique(sites$status)), sequence(length(unique(sites$status))))
17
18 s<-sites$sliver_count
19
20 res <- read.csv("/Users/soltesz/Downloads/out_resources.csv", TRUE, sep=",")
21 library(lattice)
22 cloud(memsize ~ disksize * cpuspeed|numcores, data=res)
23
24 x<-c(res[2],res[4],res[5])
25 pairs(x)
26
27
28
29 mdrc <- read.csv("/Users/soltesz/Downloads/out_resources.csv", TRUE, sep=",")
30
31 stripchart(round(slices(mdrc)), method="jitter")
32 hist(round(slices(mdrc)),breaks=30)
33
34 hist(round(slices(mdrc)),breaks=30,xlim=c(0,32))
35 stripchart(round(slices(mdrc)), method="jitter", add=TRUE, jitter=30, at=50)
36
37
38 # bottom, left, top, right
39 par(mai=c(0,1,0.5,0.2))
40 hist(round(slices(mdrc)),breaks=30,xlim=c(0,32))
41 par(mai=c(1.0,1,0.5,0.2))
42 stripchart(round(slices(mdrc))-0.5, method="jitter", jitter=20, xlim=c(0,32), ylim=c(-25,25),  ylab="Raw Samples", xlab="Slice count as a function of Mem, CPU, Disk")
43
44
45 png("/Users/soltesz/Downloads/slices.png")
46 par(mfrow=c(2,1))
47 par(mai=c(0,1,0.5,0.2))
48 hist(round(slices(mdrc)),breaks=30,xlim=c(0,32), main="Distribution of Slice Count as Function of Mem, CPU, Disk")
49 par(mai=c(1.0,1,0.5,0.2))
50 stripchart(round(slices(mdrc))-0.5, method="jitter", jitter=20, xlim=c(0,32), ylim=c(-25,25),  ylab="Raw Samples", xlab="Slice count as a function of Mem, CPU, Disk for live Planetlab Machines")
51 dev.off()
52
53
54 #-----------------------
55
56 f<-slices
57 f<-slices_2
58
59 s2<- f(mdrc, FALSE);
60 mdrc$score <- s2;
61 df <- data.frame(mdrc);
62 b<-30;
63
64 # ----------------------
65 ### LOGINBASE
66 unique_loginbase_length <- length(unique(mdrc$loginbase));
67 unique_lb <- list(loginbase=array(0,c(unique_loginbase_length)), 
68                                   score=array(0,c(unique_loginbase_length)),
69                                   memsize=array(0,c(unique_loginbase_length)),
70                                   disksize=array(0,c(unique_loginbase_length)),
71                                   cpuspeed=array(0,c(unique_loginbase_length))
72                                   )
73
74 for ( i in 1:length(mdrc$loginbase) )
75 {
76     r <- mdrc[i,];
77         v <- f(r, TRUE);
78         unique_lb$loginbase[r$loginbase] <- r$loginbase;
79         unique_lb$score[r$loginbase]    <- unique_lb$score[r$loginbase]  + r$score;
80 }
81
82 for ( i in 1:length(mdrc$loginbase) )
83 {
84     r <- mdrc[i,];
85         v <- f(r, TRUE);
86         rscore <- unique_lb$score[r$loginbase]
87         unique_lb$memsize[r$loginbase]  <- unique_lb$memsize[r$loginbase]  + v[1];
88         unique_lb$disksize[r$loginbase] <- unique_lb$disksize[r$loginbase]  + v[2];
89         unique_lb$cpuspeed[r$loginbase] <- unique_lb$cpuspeed[r$loginbase]  + v[3];
90 }
91
92 df<- data.frame(unique_lb)
93
94 h<- hist(df$score, breaks=b);
95 bins<-max(length(h$breaks),max(h$breaks));
96 c<- array(0,c(bins));
97 d<- array(0,c(bins));
98 m<- array(0,c(bins));
99 # foreach score value, find which range it falls into, 
100 # then in three columns for cpu, mem, disk, record the fraction of each.
101 # then plot each sequence in a stacked graph, perhaps beside h$counts
102 for ( i in 1:length(df$cpuspeed) )
103 {
104     r <- df[i,];
105     s <- index_of_bin(h, r$score); # find bin position...
106     # take fraction that each component contributes to the total, and add to sum
107
108     m[s] <- m[s] + unique_lb$memsize[r$loginbase];
109     d[s] <- d[s] + unique_lb$disksize[r$loginbase];
110     c[s] <- c[s] + unique_lb$cpuspeed[r$loginbase];
111 }
112
113 # ----------------------
114 ### HOSTS
115 # ---  get plot of contributing parts
116 h<- hist(df$score, breaks=b);
117 bins<-max(length(h$breaks),max(h$breaks));
118 c<- array(0,c(bins));
119 d<- array(0,c(bins));
120 m<- array(0,c(bins));
121 # foreach score value, find which range it falls into, 
122 # then in three columns for cpu, mem, disk, record the fraction of each.
123 # then plot each sequence in a stacked graph, perhaps beside h$counts
124 for ( i in 1:length(df$cpuspeed) )
125 {
126     r <- df[i,1:6];
127     s <- index_of_bin(h, r$score); # find bin position...
128     # take fraction that each component contributes to the total, and add to sum
129     v <- f(r, TRUE);
130     m[s] <- m[s] + v[1]/r$score;
131     d[s] <- d[s] + v[2]/r$score;
132     c[s] <- c[s] + v[3]/r$score;
133 }
134
135
136 #a <- array(c(c,d,m), dim=c(bins, 3));
137 a <- array(c(c), dim=c(bins, 3));
138
139 #png("/Users/soltesz/Downloads/slice_policy_1.png")
140 par(mfrow=c(2,1))
141 par(mai=c(0.5,1,0.5,0.2))
142 barplot(c(0,h$counts), 
143     xlab="slice count", 
144     main="Distribution of Per-node 'Scores' Calculated from Mem/Disk/CPU", 
145     ylab="Total Frequency", 
146     ylim=c(0,160))
147 par(mai=c(1.0,1,0,0.2));
148 barplot(t(a), 
149     legend=c("CPUspeed (GHz)", "DISKsize (GB)", "MEMsize (GB)"), 
150     col=c("pink", "lightblue", "lightgreen"), 
151     ylim=c(0,160),
152     ylab="Total with Break-down",
153     xlab="Per-node Score",
154     names.arg=h$breaks,
155 );
156 #dev.off()
157
158
159
160 #a <- list(cpuspeed=c, memsize=m, disksize=d);
161 # barplot(t(a), legend=c("cpuspeed", "disksize", "memsize"), col = c("pink", "lightblue", "lightgreen"), ylab="Total Contribution by CPU, Disk, Mem ")