clearer names for actions, and infer actions better
[monitor.git] / statistics / node_availability.r
1 source("functions.r");
2
3
4 available_nodes <- function (ns, from, to, type, fmt="%b")
5 {
6     # find 'type' range of days
7     dates <-seq(as.Date(from), as.Date(to), type)
8     months <- format(dates, fmt)
9     hbreaks<-unclass(as.POSIXct(dates))
10
11     xx<-NULL;
12     yy<-NULL;
13
14     for ( i in seq(1,length(hbreaks)-1) )
15     {
16         # get range from ns
17         ns_sub <- ns[which(ns$date > hbreaks[i] & ns$date <= hbreaks[i+1] & ns$status == 'BOOT'),]
18         nodes <- length(ns_sub$date)
19
20         xx<- c(xx, hbreaks[i])
21         yy<- c(yy, nodes)
22
23     }
24     m<- months[1:length(months)-1]
25     return (rbind(xx,yy,m))
26 }
27
28
29
30 open_tickets <- function (t, from, to, type, fmt="%b")
31 {
32     # find 'type' range of days
33     dates <-seq(as.Date(from), as.Date(to), type)
34     months <- format(dates, fmt)
35     hbreaks<-unclass(as.POSIXct(dates))
36
37     xx<-NULL;
38     yy<-NULL;
39
40     for ( i in seq(1,length(hbreaks)-1) )
41     {
42         # identify any tickets with a start time in range, lastreply in range
43         # or where both start is less and lastreply is greater than the range
44         t_sub <- t[which( (t$start < hbreaks[i] & t$lastreply > hbreaks[i+1]) | 
45                           (t$start > hbreaks[i] & t$start <= hbreaks[i+1]) | 
46                           (t$lastreply > hbreaks[i] & t$lastreply <= hbreaks[i+1]) ),]
47         tickets <- length(t_sub$start)
48         #if ( nrow(t_sub) > 0 ){
49         #    for ( j in seq(1,nrow(t_sub)) )
50         #    {
51         #        #print(sprintf("id %s, date %s", t_sub[i,'ticket_id'], t_sub[i,'s1']))
52         #        print(sprintf("id %s, date %s", t_sub[j,]$ticket_id, t_sub[j, 's1']))
53         #    }
54         #}
55
56         xx<- c(xx, hbreaks[i])
57         yy<- c(yy, tickets)
58
59     }
60     m<- months[1:length(months)-1]
61     return (rbind(xx,yy,m))
62 }
63
64 online_nodes <- function (fb)
65 {
66     breaks <- unique(fb$timestamp)
67     n<-NULL
68     o<-NULL
69     x<-NULL
70     for (i in seq(1,length(breaks)) )
71     {
72         ts <- breaks[i]
73         sub <- fb[which(fb$timestamp == ts),]
74         node_count   <- length(unique(sub$hostname))
75         online_count <- length(unique(sub$hostname[which(sub$state=='BOOT')]))
76         x<-c(x,ts)
77         n<-c(n,node_count)
78         o<-c(o,online_count)
79     }
80     print(length(x))
81     print(length(n))
82     print(length(o))
83     return (rbind(x,n,o))
84 }
85
86 lowess_smooth <- function (x, y, delta=(60*60*24), f=0.02)
87 {
88     a<-lowess(x, y, delta=delta, f=f)
89     return (a);
90 }
91
92 #####
93
94 ns <- read.csv('node-status-jun09-feb10.csv', sep=',', header=TRUE)
95 an <- available_nodes(ns, "2009-06-10", "2010-02-28", 'day')
96
97 an_x<-an[1,][which(as.numeric(an[2,]) > 100)]
98 an_y<-an[2,][which(as.numeric(an[2,]) > 100)]
99
100 ####
101 #fb7 <- read.csv('findbad_raw_2007.csv', sep=',', header=TRUE)
102 #fb8 <- read.csv('findbad_raw_2008.csv', sep=',', header=TRUE)
103 #fb9 <- read.csv('findbad_raw_2009.csv', sep=',', header=TRUE)
104 #fball <- rbind(fb7,fb8,fb9)
105
106 z7<- online_nodes(fb7)
107 z8<- online_nodes(fb8)
108 z9<- online_nodes(fb9)
109
110 zx <- c(z7[1,],z8[1,],z9[1,])
111 zy_reg <- c(z7[2,], z8[2,],z9[2,])
112 zy_avail <- c(z7[3,], z8[3,],z9[3,])
113
114 start_image("node_availability.png")
115 par(mfrow=c(2,1))
116 par(mai=c(0.1,1,0.1,0.1))
117
118 a_reg<-lowess_smooth(zx, zy_reg)
119 plot(a_reg$x, a_reg$y, 
120      ylim=c(0,700), xlim=c(min(x1[length(x1)/2]), max(x1)), type='l', pch='.', axes=F,
121      ylab="Online Node Count", xlab="")
122        
123 sx <- zx[which(zy_avail > 330)]
124 sy <- zy_avail[which(zy_avail > 330)]
125 sx <- c(sx[1:2037],sx[2061:length(sx)])
126 sy <- c(sy[1:2037],sy[2061:length(sy)])
127
128 sx <- c(sx[1:1699],sx[1701:1707],sx[1709:length(sx)])
129 sy <- c(sy[1:1699],sy[1701:1707],sy[1709:length(sy)])
130
131 lines(sx, sy, col='grey80', pch='.')
132 lines(an_x, an_y, col='grey80', pch='.')
133
134 a_avail<-lowess_smooth(zx, zy_avail)
135 lines(a_avail$x, a_avail$y, col='red', pch='.')
136
137 a_avail_m3<-lowess_smooth(an_x, an_y)
138 lines(a_avail_m3$x, a_avail_m3$y, col='red', pch='.')
139
140 axis(2, las=1)
141
142 x_online_node_list <- c(tstamp("2004-6-1"), tstamp("2005-6-1"), tstamp("2006-6-1"), tstamp("2007-11-1"))
143 y_online_node_list <- c(330, 480,  500,  550)
144 lines(x_online_node_list, y_online_node_list, col='grey80')
145
146 #abline_at_date('2005-01-01', 'grey60')
147 #abline_at_date('2006-01-01', 'grey60')
148 #abline_at_date('2007-01-01', 'grey60')
149 #abline_at_date('2008-01-01', 'grey60')
150 #abline_at_date('2009-01-01', 'grey60')
151 #abline_at_date('2010-01-01', 'grey60')
152
153 tstamp_20041112 <-abline_at_date("2004-11-12", col='grey60', lty=2)
154 tstamp_20050301 <-abline_at_date("2005-03-01", col='grey60', lty=2)
155 tstamp_20050615 <-abline_at_date("2005-06-15", col='grey60', lty=2)
156 tstamp_20051023 <-abline_at_date("2005-10-23", col='grey60', lty=2)
157 tstamp_20070101 <-abline_at_date("2007-01-01", col='grey60', lty=2)
158 tstamp_20070501 <-abline_at_date("2007-05-01", col='grey60', lty=2)
159 tstamp_20080601 <-abline_at_date("2008-06-01", col='grey60', lty=2)
160 tstamp_20080815 <-abline_at_date("2008-08-15", col='grey60', lty=2)
161 tstamp_20090501 <-abline_at_date("2009-05-01", col='grey60', lty=2)
162 tstamp_20100201 <-abline_at_date("2010-02-01", col='white', lty=2)
163
164
165 text(x=c( tstamp_20041112+(tstamp_20050301-tstamp_20041112)/2,
166         tstamp_20050301+(tstamp_20050615-tstamp_20050301)/2,
167         tstamp_20050615+(tstamp_20051023-tstamp_20050615)/2,
168         tstamp_20051023+(tstamp_20070101-tstamp_20051023)/2,
169         tstamp_20070101+(tstamp_20070501-tstamp_20070101)/2,
170         tstamp_20080601+(tstamp_20080815-tstamp_20080601)/2,
171         tstamp_20090501+(tstamp_20100201-tstamp_20090501)/2 ),
172      y=c(700),
173      labels=c('3.0', '3.1', '3.1S', '3.2', '4.0', '4.2', '4.3')) 
174
175
176 l<-length(ot[3,])
177 #axis(1, labels=ot[3,l/2:l], at=ot[1,l/2:l], cex.axis=0.7)
178 #axis(2, las=1)
179 #mtext("2004           2005           2006           2007           2008           2009", 1,2)
180
181 uptime_nodes_m3 <- function (uh, from, to)
182 {
183     # find 'type' range of days
184     dates <-seq(as.Date(from), as.Date(to), 'day')
185     months <- format(dates, '%b')
186     hbreaks<-unclass(as.POSIXct(dates))
187
188     xx<-NULL;
189     yy<-NULL;
190     date_index <- NULL;
191     q_list <- NULL;
192
193     print(length(hbreaks))
194
195     for ( i in seq(1,length(hbreaks)-1) )
196     {
197         print (sprintf("round %s of %s", i, length(hbreaks)-1))
198         # get range from t
199         print (sprintf("ts %s ", hbreaks[i] ))
200         uh_sub <- uh[which(uh$date > hbreaks[i] & uh$date <= hbreaks[i+1] ),]
201         if ( length(uh_sub$uptime ) <= 1 )  { next }
202
203         d<- uh_sub$uptime
204
205         print (sprintf("min: %s, median: %s, max: %s", min(d), median(d), max(d)))
206
207         print (sprintf("length: %s", length(d)))
208         q<-quantile(d)
209         print(q)
210
211         date_index <- c(date_index, i)
212
213         xx<- c(xx, hbreaks[i])
214         q_list <- rbind(q_list, q)
215
216     }
217     m<- months[date_index]
218     return (cbind(xx,q_list, m))
219     # 
220
221 }
222
223 uh <- read.csv('node_uptime_history.csv', header=TRUE, sep=',')
224
225
226 dm <- uptime_nodes_m3(uh, "2009-06-10", "2010-02-28")
227
228 par(mai=c(1,1,0.1,0.1))
229     plot(dm[,1], as.numeric(dm[,5])/(60*60*24), type='l', lty=1, xlab="",
230             ylim=c(min(as.numeric(dm[,2])/(60*60*24)),max(as.numeric(dm[,5])/(60*60*24))), xlim=c(min(x1[length(x1)/2]), max(x1)), axes=F, ylab="Uptime (days)", col='orange')
231     lines(dm[,1], as.numeric(dm[,4])/(60*60*24), lty=1, col='red')
232     lines(dm[,1], as.numeric(dm[,3])/(60*60*24), lty=1, col='black')
233     lines(dm[,1], as.numeric(dm[,6])/(60*60*24), lty=1, col='orange')
234     lines(dm[,1], as.numeric(dm[,2])/(60*60*24), lty=1, col='blue')
235     #axis(1, labels=dm[,7], at=dm[,1])
236     #axis(2, las=1)
237     #m<-round(max(as.numeric(dm[,4])/(60*60*24)), 2)
238     #axis(2, labels=m, at=m, las=1)
239     #abline(h=m, lty=2, col='grey40')
240
241 l<-length(ot[3,])
242 l2<-l/2
243 axis(1, labels=ot[3,l2:l], at=ot[1,l2:l], cex.axis=0.7)
244 axis(2, las=1)
245 mtext("2007                                    2008                                    2009", 1,2)
246
247 tstamp_20041112 <-abline_at_date("2004-11-12", col='grey60', lty=2)
248 tstamp_20050301 <-abline_at_date("2005-03-01", col='grey60', lty=2)
249 tstamp_20050615 <-abline_at_date("2005-06-15", col='grey60', lty=2)
250 tstamp_20051023 <-abline_at_date("2005-10-23", col='grey60', lty=2)
251 tstamp_20070101 <-abline_at_date("2007-01-01", col='grey60', lty=2)
252 tstamp_20070501 <-abline_at_date("2007-05-01", col='grey60', lty=2)
253 tstamp_20080601 <-abline_at_date("2008-06-01", col='grey60', lty=2)
254 tstamp_20080815 <-abline_at_date("2008-08-15", col='grey60', lty=2)
255 tstamp_20090501 <-abline_at_date("2009-05-01", col='grey60', lty=2)
256 tstamp_20100201 <-abline_at_date("2010-02-01", col='white', lty=2)
257
258
259 text(x=c( tstamp_20041112+(tstamp_20050301-tstamp_20041112)/2,
260         tstamp_20050301+(tstamp_20050615-tstamp_20050301)/2,
261         tstamp_20050615+(tstamp_20051023-tstamp_20050615)/2,
262         tstamp_20051023+(tstamp_20070101-tstamp_20051023)/2,
263         tstamp_20070101+(tstamp_20070501-tstamp_20070101)/2,
264         tstamp_20080601+(tstamp_20080815-tstamp_20080601)/2,
265         tstamp_20090501+(tstamp_20100201-tstamp_20090501)/2 ),
266      y=c(120),
267      labels=c('3.0', '3.1', '3.1S', '3.2', '4.0', '4.2', '4.3')) 
268
269 end_image()