Minor updates:
[monitor.git] / statistics / operator_overhead.r
1 source("functions.r");
2
3 median_time_to_resolve_window <- function (t, from, to, window, fmt="%b")
4 {
5     # find 'type' range of days
6     dates <-seq(as.Date(from), as.Date(to), 'week')
7     months <- format(dates, fmt)
8     hbreaks<-unclass(as.POSIXct(dates))
9
10     xx<-NULL;
11     yy<-NULL;
12     yy_sd_high<-NULL;
13     yy_sd_low<-NULL;
14     date_index <- NULL;
15     q_list <- NULL;
16
17     x<-seq(-20,20,0.01)
18     for ( i in seq(1,length(hbreaks)-window-1) )
19     {
20         print (sprintf("round %s of %s", i, length(hbreaks)-window-1))
21         # get range from t
22         t_sub <- t[which(t$start > hbreaks[i] & t$start<= hbreaks[i+window]),]
23         if ( length(t_sub$start) <= 1 )  { next }
24         # take log, then sn.mle -> h
25         d <- (t_sub$lastreply - t_sub$start)/(60*60)    # hours
26         d <- log(d)                                     # log(hours)
27             # sn.mle
28         print (sprintf("length: %s", length(d)))
29         q<-quantile(d)
30         print(q)
31
32         date_index <- c(date_index, round(i+window/2))
33
34         xx<- c(xx, hbreaks[round(i+window/2)])
35         q_list <- rbind(q_list, q)
36
37     }
38     m<- months[date_index]
39     return (cbind(xx,q_list, m))
40 }
41
42 available_nodes <- function (ns, from, to, type, fmt="%b")
43 {
44     # find 'type' range of days
45     dates <-seq(as.Date(from), as.Date(to), type)
46     months <- format(dates, fmt)
47     hbreaks<-unclass(as.POSIXct(dates))
48
49     xx<-NULL;
50     yy<-NULL;
51
52     for ( i in seq(1,length(hbreaks)-1) )
53     {
54         # get range from ns
55         ns_sub <- ns[which(ns$date > hbreaks[i] & ns$date <= hbreaks[i+1] & ns$status == 'BOOT'),]
56         nodes <- length(ns_sub$date)
57
58         xx<- c(xx, hbreaks[i])
59         yy<- c(yy, nodes)
60
61     }
62     m<- months[1:length(months)-1]
63     return (rbind(xx,yy,m))
64 }
65
66
67
68 open_tickets <- function (t, from, to, type, fmt="%b")
69 {
70     # find 'type' range of days
71     dates <-seq(as.Date(from), as.Date(to), type)
72     months <- format(dates, fmt)
73     hbreaks<-unclass(as.POSIXct(dates))
74
75     xx<-NULL;
76     yy<-NULL;
77
78     for ( i in seq(1,length(hbreaks)-1) )
79     {
80         # identify any tickets with a start time in range, lastreply in range
81         # or where both start is less and lastreply is greater than the range
82         t_sub <- t[which( (t$start < hbreaks[i] & t$lastreply > hbreaks[i+1]) | 
83                           (t$start > hbreaks[i] & t$start <= hbreaks[i+1]) | 
84                           (t$lastreply > hbreaks[i] & t$lastreply <= hbreaks[i+1]) ),]
85         tickets <- length(t_sub$start)
86         #if ( nrow(t_sub) > 0 ){
87         #    for ( j in seq(1,nrow(t_sub)) )
88         #    {
89         #        #print(sprintf("id %s, date %s", t_sub[i,'ticket_id'], t_sub[i,'s1']))
90         #        print(sprintf("id %s, date %s", t_sub[j,]$ticket_id, t_sub[j, 's1']))
91         #    }
92         #}
93
94         xx<- c(xx, hbreaks[i])
95         yy<- c(yy, tickets)
96
97     }
98     m<- months[1:length(months)-1]
99     return (rbind(xx,yy,m))
100 }
101
102 online_nodes <- function (fb)
103 {
104     breaks <- unique(fb$timestamp)
105     n<-NULL
106     o<-NULL
107     x<-NULL
108     for (i in seq(1,length(breaks)) )
109     {
110         ts <- breaks[i]
111         sub <- fb[which(fb$timestamp == ts),]
112         node_count   <- length(unique(sub$hostname))
113         online_count <- length(unique(sub$hostname[which(sub$state=='BOOT')]))
114         x<-c(x,ts)
115         n<-c(n,node_count)
116         o<-c(o,online_count)
117     }
118     print(length(x))
119     print(length(n))
120     print(length(o))
121     return (rbind(x,n,o))
122 }
123
124 #####
125
126 # system("parse_rt_data.py 3 > rt_data.csv");
127 t <- read.csv('rt_data_2004-2010.csv', sep=',', header=TRUE)
128 t2 <- t[which(t$complete == 1),]
129 ot <- open_tickets(t2, '2004/1/1', '2010/2/28', 'day', "%b")
130
131 #start_image("rt_operator_overhead.png")
132 par(mfrow=c(2,1))
133 par(mai=c(0,1,0.3,0.1))
134
135 x1<-as.numeric(ot[1,])
136 y1<-as.numeric(ot[2,])
137
138 a_ot<-lowess_smooth(x1, y1)
139
140 plot(x1, y1, col='grey80', type='l', axes=F, 
141     ylab="a) Open Tickets (tickets/day)", xlab="Date",
142     ylim=c(0,120)) # , ylim=c(0,260))
143 lines(a_ot$x, round(a_ot$y), col='black')
144
145 #axis(1, labels=ot[3,], at=ot[1,], cex.axis=0.7)
146 axis(2, las=1)
147 #mtext("2004           2005           2006           2007           2008           2009", 1,2)
148
149 #abline_at_date('2005-01-01', 'grey60')
150 #abline_at_date('2006-01-01', 'grey60')
151 #abline_at_date('2007-01-01', 'grey60')
152 #abline_at_date('2008-01-01', 'grey60')
153 #abline_at_date('2009-01-01', 'grey60')
154 #abline_at_date('2010-01-01', 'grey60')
155 abline(h=25, lty=3, col='grey80')
156 abline(h=40, lty=3, col='grey80')
157
158 tstamp_20040412 <-abline_at_date("2004-04-12", col='white', lty=0, height=110)
159 tstamp_20041112 <-abline_at_date("2004-11-12", col='grey60', lty=3, height=110)
160 tstamp_20050301 <-abline_at_date("2005-03-01", col='grey60', lty=3, height=110)
161 tstamp_20050615 <-abline_at_date("2005-06-15", col='white',  lty=0, height=110)
162 #tstamp_20051023 <-abline_at_date("2005-10-23", col='grey60', lty=3, height=110)
163 tstamp_20051001 <-abline_at_date("2005-10-01", col='grey60', lty=3, height=110)
164 tstamp_20060519 <-abline_at_date("2006-05-19", col='grey60', lty=3, height=110)
165 tstamp_20070228 <-abline_at_date("2007-02-28", col='grey60', lty=3, height=110)
166 tstamp_20070501 <-abline_at_date("2007-05-01", col='white',  lty=0, height=110)
167 tstamp_20071021 <-abline_at_date("2007-10-21", col='grey60', lty=3, height=110)
168 tstamp_20080601 <-abline_at_date("2008-06-01", col='grey60', lty=3, height=110)
169 tstamp_20080815 <-abline_at_date("2008-08-15", col='white',  lty=0, height=110)
170 tstamp_20090501 <-abline_at_date("2009-05-01", col='grey60', lty=3, height=110)
171 tstamp_20100201 <-abline_at_date("2010-02-01", col='white',  lty=0, height=110)
172
173         #tstamp_20051023,
174 text(x=c(tstamp_20040412,
175         tstamp_20041112,
176         tstamp_20050301,
177         tstamp_20050615,
178         tstamp_20051001,
179         tstamp_20060519,
180         tstamp_20070228,
181         tstamp_20071021,
182         tstamp_20080601,
183         tstamp_20090501),
184      y=c(120),
185      labels=c('Release', '3.0', '3.1', '', '3.2', '3.3', '4.0', '4.1', '4.2', '4.3')) 
186      #labels=c('3.0', '3.1', '3.1S', '3.2', '4.0', '4.2', '4.3')) 
187
188
189 #text(x=c( tstamp_20041112+(tstamp_20050301-tstamp_20041112)/2,
190 #        tstamp_20050301+(tstamp_20050615-tstamp_20050301)/2,
191 #        tstamp_20050615+(tstamp_20051023-tstamp_20050615)/2,
192 #        tstamp_20051023+(tstamp_20070101-tstamp_20051023)/2,
193 #        tstamp_20070101+(tstamp_20070501-tstamp_20070101)/2,
194 #        tstamp_20080601+(tstamp_20080815-tstamp_20080601)/2,
195 #        tstamp_20090501+(tstamp_20100201-tstamp_20090501)/2 ),
196 #     y=c(120),
197 #     labels=c('3.0', '3.1', '3.1S', '3.2', '4.0', '4.2', '4.3')) 
198
199 par(mai=c(1,1,0.1,0.1))
200 for ( s in c(7) ) 
201 {
202     d<- median_time_to_resolve_window(t2, "2004/1/1", "2010/2/28", s, "%b")
203     plot(d[,1], exp(as.numeric(d[,5]))/24, type='l', lty=1, xlab="",
204             axes=F, ylim=c(0.01, 15), ylab="b) Resolution Time by", col='black',
205             xlim=c(min(x1), max(x1)))
206     mtext("Quartile (days)", 2, 2)
207     lines(d[,1], exp(as.numeric(d[,4]))/24, lty=1, col='grey50')
208     lines(d[,1], exp(as.numeric(d[,3]))/24, lty=1, col='grey75')
209     #axis(1, labels=d[,7], at=d[,1])
210     axis(1, labels=ot[3,], at=ot[1,], cex.axis=0.7)
211     mtext("2004           2005           2006           2007           2008           2009", 1,2)
212     axis(2, labels=c(0,1,4,7,14), at=c(0,1,4,7,14), las=1)
213     m<-round(max(exp(as.numeric(d[,4]))/24), 2)
214     #axis(2, labels=m, at=m, las=1)
215     #abline(h=m, lty=3, col='grey80')
216 }
217
218 abline(h=1, lty=3, col='grey80')
219 abline(h=4, lty=3, col='grey80')
220 abline(h=7, lty=3, col='grey80')
221
222 tstamp_20040412 <-abline_at_date("2004-04-12", col='white', lty=0, height=14)
223 tstamp_20041112 <-abline_at_date("2004-11-12", col='grey60', lty=3, height=14)
224 tstamp_20050301 <-abline_at_date("2005-03-01", col='grey60', lty=3, height=14)
225 tstamp_20050615 <-abline_at_date("2005-06-15", col='white',  lty=0, height=14)
226 #tstamp_20051023 <-abline_at_date("2005-10-23", col='grey60', lty=3, height=14)
227 tstamp_20051001 <-abline_at_date("2005-10-01", col='grey60', lty=3, height=14)
228 tstamp_20060519 <-abline_at_date("2006-05-19", col='grey60', lty=3, height=14)
229 tstamp_20070228 <-abline_at_date("2007-02-28", col='grey60', lty=3, height=14)
230 tstamp_20070501 <-abline_at_date("2007-05-01", col='white',  lty=0, height=14)
231 tstamp_20071021 <-abline_at_date("2007-10-21", col='grey60', lty=3, height=14)
232 tstamp_20080601 <-abline_at_date("2008-06-01", col='grey60', lty=3, height=14)
233 tstamp_20080815 <-abline_at_date("2008-08-15", col='white',  lty=0, height=14)
234 tstamp_20090501 <-abline_at_date("2009-05-01", col='grey60', lty=3, height=14)
235 tstamp_20100201 <-abline_at_date("2010-02-01", col='white',  lty=0, height=14)
236
237
238 text(x=c(tstamp_20040412,
239         tstamp_20041112,
240         tstamp_20050301,
241         tstamp_20050615,
242         tstamp_20051001,
243         tstamp_20060519,
244         tstamp_20070228,
245         tstamp_20071021,
246         tstamp_20080601,
247         tstamp_20090501),
248      y=c(15),
249      labels=c('Release', '3.0', '3.1', '', '3.2', '3.3', '4.0', '4.1', '4.2', '4.3')) 
250
251 #tstamp_20041112 <-abline_at_date("2004-11-12", col='grey60', lty=2, height=10)
252 #tstamp_20050301 <-abline_at_date("2005-03-01", col='grey60', lty=2, height=10)
253 #tstamp_20050615 <-abline_at_date("2005-06-15", col='grey60', lty=2, height=10)
254 #tstamp_20051023 <-abline_at_date("2005-10-23", col='grey60', lty=2, height=10)
255 #tstamp_20070101 <-abline_at_date("2007-01-01", col='grey60', lty=2, height=10)
256 #tstamp_20070501 <-abline_at_date("2007-05-01", col='grey60', lty=2, height=10)
257 #tstamp_20080601 <-abline_at_date("2008-06-01", col='grey60', lty=2, height=10)
258 #tstamp_20080815 <-abline_at_date("2008-08-15", col='grey60', lty=2, height=10)
259 #tstamp_20090501 <-abline_at_date("2009-05-01", col='grey60', lty=2, height=10)
260 #tstamp_20100201 <-abline_at_date("2010-02-01", col='white', lty=2, height=10)
261 #
262 #
263 #text(x=c( tstamp_20041112+(tstamp_20050301-tstamp_20041112)/2,
264 #        tstamp_20050301+(tstamp_20050615-tstamp_20050301)/2,
265 #        tstamp_20050615+(tstamp_20051023-tstamp_20050615)/2,
266 #        tstamp_20051023+(tstamp_20070101-tstamp_20051023)/2,
267 #        tstamp_20070101+(tstamp_20070501-tstamp_20070101)/2,
268 #        tstamp_20080601+(tstamp_20080815-tstamp_20080601)/2,
269 #        tstamp_20090501+(tstamp_20100201-tstamp_20090501)/2 ),
270 #     y=c(15),
271 #     labels=c('3.0', '3.1', '3.1S', '3.2', '4.0', '4.2', '4.3')) 
272
273 #end_image()