66583668a28f09b7a595bca5c61806d87d5a43f8
[monitor.git] / statistics / operator_overhead.r
1 source("functions.r");
2
3 median_time_to_resolve_window <- function (t, from, to, window, fmt="%b")
4 {
5     # find 'type' range of days
6     dates <-seq(as.Date(from), as.Date(to), 'week')
7     months <- format(dates, fmt)
8     hbreaks<-unclass(as.POSIXct(dates))
9
10     xx<-NULL;
11     yy<-NULL;
12     yy_sd_high<-NULL;
13     yy_sd_low<-NULL;
14     date_index <- NULL;
15     q_list <- NULL;
16
17     x<-seq(-20,20,0.01)
18     for ( i in seq(1,length(hbreaks)-window-1) )
19     {
20         print (sprintf("round %s of %s", i, length(hbreaks)-window-1))
21         # get range from t
22         t_sub <- t[which(t$start > hbreaks[i] & t$start<= hbreaks[i+window]),]
23         if ( length(t_sub$start) <= 1 )  { next }
24         # take log, then sn.mle -> h
25         d <- (t_sub$lastreply - t_sub$start)/(60*60)    # hours
26         d <- log(d)                                     # log(hours)
27             # sn.mle
28         print (sprintf("length: %s", length(d)))
29         q<-quantile(d)
30         print(q)
31
32         date_index <- c(date_index, round(i+window/2))
33
34         xx<- c(xx, hbreaks[round(i+window/2)])
35         q_list <- rbind(q_list, q)
36
37     }
38     m<- months[date_index]
39     return (cbind(xx,q_list, m))
40 }
41
42 available_nodes <- function (ns, from, to, type, fmt="%b")
43 {
44     # find 'type' range of days
45     dates <-seq(as.Date(from), as.Date(to), type)
46     months <- format(dates, fmt)
47     hbreaks<-unclass(as.POSIXct(dates))
48
49     xx<-NULL;
50     yy<-NULL;
51
52     for ( i in seq(1,length(hbreaks)-1) )
53     {
54         # get range from ns
55         ns_sub <- ns[which(ns$date > hbreaks[i] & ns$date <= hbreaks[i+1] & ns$status == 'BOOT'),]
56         nodes <- length(ns_sub$date)
57
58         xx<- c(xx, hbreaks[i])
59         yy<- c(yy, nodes)
60
61     }
62     m<- months[1:length(months)-1]
63     return (rbind(xx,yy,m))
64 }
65
66
67
68 open_tickets <- function (t, from, to, type, fmt="%b")
69 {
70     # find 'type' range of days
71     dates <-seq(as.Date(from), as.Date(to), type)
72     months <- format(dates, fmt)
73     hbreaks<-unclass(as.POSIXct(dates))
74
75     xx<-NULL;
76     yy<-NULL;
77
78     for ( i in seq(1,length(hbreaks)-1) )
79     {
80         # identify any tickets with a start time in range, lastreply in range
81         # or where both start is less and lastreply is greater than the range
82         t_sub <- t[which( (t$start < hbreaks[i] & t$lastreply > hbreaks[i+1]) | 
83                           (t$start > hbreaks[i] & t$start <= hbreaks[i+1]) | 
84                           (t$lastreply > hbreaks[i] & t$lastreply <= hbreaks[i+1]) ),]
85         tickets <- length(t_sub$start)
86         #if ( nrow(t_sub) > 0 ){
87         #    for ( j in seq(1,nrow(t_sub)) )
88         #    {
89         #        #print(sprintf("id %s, date %s", t_sub[i,'ticket_id'], t_sub[i,'s1']))
90         #        print(sprintf("id %s, date %s", t_sub[j,]$ticket_id, t_sub[j, 's1']))
91         #    }
92         #}
93
94         xx<- c(xx, hbreaks[i])
95         yy<- c(yy, tickets)
96
97     }
98     m<- months[1:length(months)-1]
99     return (rbind(xx,yy,m))
100 }
101
102 online_nodes <- function (fb)
103 {
104     breaks <- unique(fb$timestamp)
105     n<-NULL
106     o<-NULL
107     x<-NULL
108     for (i in seq(1,length(breaks)) )
109     {
110         ts <- breaks[i]
111         sub <- fb[which(fb$timestamp == ts),]
112         node_count   <- length(unique(sub$hostname))
113         online_count <- length(unique(sub$hostname[which(sub$state=='BOOT')]))
114         x<-c(x,ts)
115         n<-c(n,node_count)
116         o<-c(o,online_count)
117     }
118     print(length(x))
119     print(length(n))
120     print(length(o))
121     return (rbind(x,n,o))
122 }
123
124 lowess_smooth <- function (x, y, delta=(60*60*24), f=0.02)
125 {
126     a<-lowess(x, y, delta=delta, f=f)
127     return (a);
128 }
129
130 #####
131
132 # system("parse_rt_data.py 3 > rt_data.csv");
133 t <- read.csv('rt_data_2004-2010.csv', sep=',', header=TRUE)
134 t2 <- t[which(t$complete == 1),]
135 ot <- open_tickets(t2, '2004/1/1', '2010/2/28', 'day', "%b")
136
137 start_image("rt_operator_overhead.png")
138 par(mfrow=c(2,1))
139 par(mai=c(0,1,0.3,0.1))
140
141 x1<-as.numeric(ot[1,])
142 y1<-as.numeric(ot[2,])
143
144 a_ot<-lowess_smooth(x1, y1)
145
146 plot(x1, y1, col='grey80', type='l', axes=F, 
147     ylab="a) Open Tickets (tickets/day)", xlab="Date",
148     ylim=c(0,120)) # , ylim=c(0,260))
149 lines(a_ot$x, round(a_ot$y), col='black')
150
151 #axis(1, labels=ot[3,], at=ot[1,], cex.axis=0.7)
152 axis(2, las=1)
153 #mtext("2004           2005           2006           2007           2008           2009", 1,2)
154
155 #abline_at_date('2005-01-01', 'grey60')
156 #abline_at_date('2006-01-01', 'grey60')
157 #abline_at_date('2007-01-01', 'grey60')
158 #abline_at_date('2008-01-01', 'grey60')
159 #abline_at_date('2009-01-01', 'grey60')
160 #abline_at_date('2010-01-01', 'grey60')
161 abline(h=25, lty=3, col='grey80')
162 abline(h=40, lty=3, col='grey80')
163
164 tstamp_20040412 <-abline_at_date("2004-04-12", col='white', lty=0, height=110)
165 tstamp_20041112 <-abline_at_date("2004-11-12", col='grey60', lty=3, height=110)
166 tstamp_20050301 <-abline_at_date("2005-03-01", col='grey60', lty=3, height=110)
167 tstamp_20050615 <-abline_at_date("2005-06-15", col='white',  lty=0, height=110)
168 #tstamp_20051023 <-abline_at_date("2005-10-23", col='grey60', lty=3, height=110)
169 tstamp_20051001 <-abline_at_date("2005-10-01", col='grey60', lty=3, height=110)
170 tstamp_20060519 <-abline_at_date("2006-05-19", col='grey60', lty=3, height=110)
171 tstamp_20070228 <-abline_at_date("2007-02-28", col='grey60', lty=3, height=110)
172 tstamp_20070501 <-abline_at_date("2007-05-01", col='white',  lty=0, height=110)
173 tstamp_20071021 <-abline_at_date("2007-10-21", col='grey60', lty=3, height=110)
174 tstamp_20080601 <-abline_at_date("2008-06-01", col='grey60', lty=3, height=110)
175 tstamp_20080815 <-abline_at_date("2008-08-15", col='white',  lty=0, height=110)
176 tstamp_20090501 <-abline_at_date("2009-05-01", col='grey60', lty=3, height=110)
177 tstamp_20100201 <-abline_at_date("2010-02-01", col='white',  lty=0, height=110)
178
179         #tstamp_20051023,
180 text(x=c(tstamp_20040412,
181         tstamp_20041112,
182         tstamp_20050301,
183         tstamp_20050615,
184         tstamp_20051001,
185         tstamp_20060519,
186         tstamp_20070228,
187         tstamp_20071021,
188         tstamp_20080601,
189         tstamp_20090501),
190      y=c(120),
191      labels=c('Release', '3.0', '3.1', '', '3.2', '3.3', '4.0', '4.1', '4.2', '4.3')) 
192      #labels=c('3.0', '3.1', '3.1S', '3.2', '4.0', '4.2', '4.3')) 
193
194
195 #text(x=c( tstamp_20041112+(tstamp_20050301-tstamp_20041112)/2,
196 #        tstamp_20050301+(tstamp_20050615-tstamp_20050301)/2,
197 #        tstamp_20050615+(tstamp_20051023-tstamp_20050615)/2,
198 #        tstamp_20051023+(tstamp_20070101-tstamp_20051023)/2,
199 #        tstamp_20070101+(tstamp_20070501-tstamp_20070101)/2,
200 #        tstamp_20080601+(tstamp_20080815-tstamp_20080601)/2,
201 #        tstamp_20090501+(tstamp_20100201-tstamp_20090501)/2 ),
202 #     y=c(120),
203 #     labels=c('3.0', '3.1', '3.1S', '3.2', '4.0', '4.2', '4.3')) 
204
205 par(mai=c(1,1,0.1,0.1))
206 for ( s in c(7) ) 
207 {
208     d<- median_time_to_resolve_window(t2, "2004/1/1", "2010/2/28", s, "%b")
209     plot(d[,1], exp(as.numeric(d[,5]))/24, type='l', lty=1, xlab="",
210             axes=F, ylim=c(0.01, 15), ylab="b) Resolution Time by", col='black',
211             xlim=c(min(x1), max(x1)))
212     mtext("Quartile (days)", 2, 2)
213     lines(d[,1], exp(as.numeric(d[,4]))/24, lty=1, col='grey50')
214     lines(d[,1], exp(as.numeric(d[,3]))/24, lty=1, col='grey75')
215     #axis(1, labels=d[,7], at=d[,1])
216     axis(1, labels=ot[3,], at=ot[1,], cex.axis=0.7)
217     mtext("2004           2005           2006           2007           2008           2009", 1,2)
218     axis(2, labels=c(0,1,4,7,14), at=c(0,1,4,7,14), las=1)
219     m<-round(max(exp(as.numeric(d[,4]))/24), 2)
220     #axis(2, labels=m, at=m, las=1)
221     #abline(h=m, lty=3, col='grey80')
222 }
223
224 abline(h=1, lty=3, col='grey80')
225 abline(h=4, lty=3, col='grey80')
226 abline(h=7, lty=3, col='grey80')
227
228 tstamp_20040412 <-abline_at_date("2004-04-12", col='white', lty=0, height=14)
229 tstamp_20041112 <-abline_at_date("2004-11-12", col='grey60', lty=3, height=14)
230 tstamp_20050301 <-abline_at_date("2005-03-01", col='grey60', lty=3, height=14)
231 tstamp_20050615 <-abline_at_date("2005-06-15", col='white',  lty=0, height=14)
232 #tstamp_20051023 <-abline_at_date("2005-10-23", col='grey60', lty=3, height=14)
233 tstamp_20051001 <-abline_at_date("2005-10-01", col='grey60', lty=3, height=14)
234 tstamp_20060519 <-abline_at_date("2006-05-19", col='grey60', lty=3, height=14)
235 tstamp_20070228 <-abline_at_date("2007-02-28", col='grey60', lty=3, height=14)
236 tstamp_20070501 <-abline_at_date("2007-05-01", col='white',  lty=0, height=14)
237 tstamp_20071021 <-abline_at_date("2007-10-21", col='grey60', lty=3, height=14)
238 tstamp_20080601 <-abline_at_date("2008-06-01", col='grey60', lty=3, height=14)
239 tstamp_20080815 <-abline_at_date("2008-08-15", col='white',  lty=0, height=14)
240 tstamp_20090501 <-abline_at_date("2009-05-01", col='grey60', lty=3, height=14)
241 tstamp_20100201 <-abline_at_date("2010-02-01", col='white',  lty=0, height=14)
242
243
244 text(x=c(tstamp_20040412,
245         tstamp_20041112,
246         tstamp_20050301,
247         tstamp_20050615,
248         tstamp_20051001,
249         tstamp_20060519,
250         tstamp_20070228,
251         tstamp_20071021,
252         tstamp_20080601,
253         tstamp_20090501),
254      y=c(15),
255      labels=c('Release', '3.0', '3.1', '', '3.2', '3.3', '4.0', '4.1', '4.2', '4.3')) 
256
257 #tstamp_20041112 <-abline_at_date("2004-11-12", col='grey60', lty=2, height=10)
258 #tstamp_20050301 <-abline_at_date("2005-03-01", col='grey60', lty=2, height=10)
259 #tstamp_20050615 <-abline_at_date("2005-06-15", col='grey60', lty=2, height=10)
260 #tstamp_20051023 <-abline_at_date("2005-10-23", col='grey60', lty=2, height=10)
261 #tstamp_20070101 <-abline_at_date("2007-01-01", col='grey60', lty=2, height=10)
262 #tstamp_20070501 <-abline_at_date("2007-05-01", col='grey60', lty=2, height=10)
263 #tstamp_20080601 <-abline_at_date("2008-06-01", col='grey60', lty=2, height=10)
264 #tstamp_20080815 <-abline_at_date("2008-08-15", col='grey60', lty=2, height=10)
265 #tstamp_20090501 <-abline_at_date("2009-05-01", col='grey60', lty=2, height=10)
266 #tstamp_20100201 <-abline_at_date("2010-02-01", col='white', lty=2, height=10)
267 #
268 #
269 #text(x=c( tstamp_20041112+(tstamp_20050301-tstamp_20041112)/2,
270 #        tstamp_20050301+(tstamp_20050615-tstamp_20050301)/2,
271 #        tstamp_20050615+(tstamp_20051023-tstamp_20050615)/2,
272 #        tstamp_20051023+(tstamp_20070101-tstamp_20051023)/2,
273 #        tstamp_20070101+(tstamp_20070501-tstamp_20070101)/2,
274 #        tstamp_20080601+(tstamp_20080815-tstamp_20080601)/2,
275 #        tstamp_20090501+(tstamp_20100201-tstamp_20090501)/2 ),
276 #     y=c(15),
277 #     labels=c('3.0', '3.1', '3.1S', '3.2', '4.0', '4.2', '4.3')) 
278
279 end_image()