R routines for printing some statistics
[monitor.git] / statistics / rt_data.r
1
2
3 source("functions.r");
4
5 # system("parse_rt_data.py 3 > rt_data.csv");
6 #t <- read.csv('rt_data.csv', sep=',', header=TRUE)
7 t <- read.csv('rt_data_2004-2010.csv', sep=',', header=TRUE)
8
9 par(mfrow=c(2,1))
10
11 h<-hist(log(log(t$replies)), breaks=50)
12 lines(h$breaks[which(h$counts!=0)], h$counts[which(h$counts!=0)])
13 h<-hist(log(log(log(t$replies))), breaks=50)
14 lines(h$breaks[which(h$counts!=0)], h$counts[which(h$counts!=0)])
15
16
17 par(mfrow=c(1,1))
18
19 t2 <- t[which(t$complete == 1),]
20 d <- (t2$lastreply - t2$start)/(60*60)
21
22 #start_image("rt_hist_ttc_1000.png")
23 #hist(d[which(d<1000)], xlab="hours from creation to last reply", breaks=30)
24 #end_image()
25 #
26 #start_image("rt_hist_ttc_200.png")
27 #hist(d[which(d<200)], xlab="hours from creation to last reply", breaks=30)
28 #end_image()
29 #
30 #start_image("rt_hist_ttc_50.png")
31 #hist(d[which(d<50)], xlab="hours from creation to last reply", breaks=30)
32 #end_image()
33 #
34 #start_image("rt_hist_ttc_10.png")
35 #hist(d[which(d<10)], xlab="hours from creation to last reply", breaks=30)
36 #end_image()
37 #
38 #d2 <- (t2$lastreply - t2$start)
39 #h<-hist(log(d2), plot=F, breaks=50)
40 #lines(h$breaks[which(h$counts!=0)], h$counts[which(h$counts!=0)])
41
42
43 # this doesn't work as I would like.  I think the bins aren't as I expect
44 #h <- hist(d, plot=F, breaks=c(seq(0,max(d)+1, .1)))
45 #plot(h$counts, log="x", pch=20, col="blue",
46 #       main="Log-normal distribution",
47 #       xlab="Value", ylab="Frequency")
48
49 #plot(log(d2))
50 #plot(ecdf(d2))
51
52 tstamp_45 <-unclass(as.POSIXct("2005-01-01", origin="1960-01-01"))[1]
53 tstamp_56 <-unclass(as.POSIXct("2006-01-01", origin="1960-01-01"))[1]
54 tstamp_67 <-unclass(as.POSIXct("2007-01-01", origin="1960-01-01"))[1]
55 tstamp_78 <-unclass(as.POSIXct("2008-01-01", origin="1960-01-01"))[1]
56 tstamp_89 <-unclass(as.POSIXct("2009-01-01", origin="1960-01-01"))[1]
57 tstamp_90 <-unclass(as.POSIXct("2010-01-01", origin="1960-01-01"))[1]
58
59
60 t_4 <- t2[which( t2$start <  tstamp_45 ),]
61 t_5 <- t2[which( t2$start >= tstamp_45 & t2$start < tstamp_56 ),]
62 t_6 <- t2[which( t2$start >= tstamp_56 & t2$start < tstamp_67 ),]
63 t_7 <- t2[which( t2$start >= tstamp_67 & t2$start < tstamp_78 ),]
64 t_8 <- t2[which( t2$start >= tstamp_78 & t2$start < tstamp_89 ),]
65 t_9 <- t2[which( t2$start >= tstamp_89 & t2$start < tstamp_90 ),]
66 t_10 <- t2[which( t2$start >= tstamp_90 ),]
67
68 par(mfrow=c(4,1))
69 plot_rt_hist(t_4)
70 plot_rt_hist(t_5)
71 plot_rt_hist(t_6)
72 plot_rt_hist(t_7)
73 plot_rt_hist(t_8)
74 plot_rt_hist(t_9)
75 par(mfrow=c(1,1))
76
77 start_image("rt_support_seasonal.png")
78 par(mfrow=c(6,1))
79 par(mai=c(.3,.3,.3,.3))
80
81 # start dates on Sunday to align all weeks with weekend boundaries.
82 year_hist(t_4, "2004", "2003/12/28", "2005/1/7", 85)
83 year_hist(t_5, "2005", "2005/1/2", "2006/1/7", 85)
84 year_hist(t_6, "2006", "2006/1/1", "2007/1/7", 85)
85 year_hist(t_7, "2007", "2006/12/31", "2008/1/7", 85)
86 year_hist(t_8, "2008", "2007/12/30", "2009/1/7", 85)
87 year_hist(t_9, "2009", "2008/12/28", "2010/1/30", 85)
88 end_image()
89
90 par(mai=c(0.7,0.7,0.7,0.7))
91 par(mfrow=c(1,1))
92
93
94 tstamp <-unclass(as.POSIXct("2008-01-01", origin="1960-01-01"))
95 t_67 <- t2[which( t2$start <  tstamp[1] ),]
96 t_89 <- t2[which( t2$start >= tstamp[1] ),]
97
98
99 # install.packages('sn')
100 require(sn)
101 par(mfrow=c(6,1))
102 par(mai=c(0.3,0.3,0.3,0.3))
103
104 #start_image("rt_hist_ttc_1000.png")
105 time_hist <- function (t, lessthan, year, log=T, breaks=30, xlim=c(-4,10), ylim=c(0,150))
106 {
107     d <- (t$lastreply - t$start)/(60*60)
108     main = sprintf("Histogram of d<%s for %s", lessthan, year);
109     if ( log )
110     {
111         d <- log(d[which(d<lessthan)])
112         avg <- round(mean(d), 2)
113         main = sprintf("Histogram of d<%s for %s : %s", lessthan, year, avg);
114         #h<-sn.mle(d, xlab="Hours to Complete Ticket", breaks=30, main=main, xlim=xlim, ylim=ylim)
115         h<-sn.mle(y=d)
116     } else {
117         h<-hist(d[which(d<lessthan)], xlab="Hours to Complete Ticket", breaks=30, main=main, xlim=xlim, ylim=ylim)
118     }
119     return (h);
120 }
121 tstamp <-unclass(as.POSIXct("2007-05-01", origin="1960-01-01"))
122 t_7a <- t_7[which(t_7$start < tstamp),]
123 t_7b <- t_7[which(t_7$start >= tstamp),]
124
125 #end_image()
126 h4<-time_hist(t_4, 10000, "2004")
127 h5<-time_hist(t_5, 10000, "2005")
128 h6<-time_hist(t_6, 10000, "2006")
129 #h7<-time_hist(t_7, 10000, "2007")
130 h7a<-time_hist(t_7a, 10000, "2007")
131 h7b<-time_hist(t_7b, 10000, "2007")
132 h8<-time_hist(t_8, 10000, "2008")
133 h9<-time_hist(t_9, 10000, "2009")
134
135 tstamp <-unclass(as.POSIXct("2009-09-01", origin="1960-01-01"))
136 m_9a <- m_9[which(m_9$start < tstamp),]
137 m_9b <- m_9[which(m_9$start >= tstamp),]
138
139 split_by_time <- function (t, datestr)
140 {
141     tstamp <-unclass(as.POSIXct(datestr, origin="1960-01-01"))
142     a <- t[which(t$start < tstamp),]
143     b <- t[which(t$start >= tstamp),]
144     v<- list('before'=a, 'after'=b)
145     return (v);
146 }
147
148 mh7 <- time_hist(m_7, 10000, '2007')
149
150 sm_8 <- split_by_time(m_8, "2008-07-01")
151 #mh8a <- time_hist(rbind(m_7, m_8$before, m_8$after), 10000, '2008')
152 #mh8a <- time_hist(rbind(m_7[which(log((m_7$lastreply-m_7$start)/(60*60))>2),]), 10000, '2008')
153 # m_7 is junk data
154
155 mh_8 <- time_hist(sm_8$before, 10000, '2008')
156
157 sm_9 <- split_by_time(m_9, "2009-09-01")
158
159 mh_89 <- time_hist(rbind(sm_8$after, sm_9$before), 10000, '2009')
160 mh_9 <- time_hist(sm_9$after, 10000, '2009')
161
162
163 x<-seq(-8,10,0.01)
164 #x<- exp(x)/24
165
166 #my7<-dsn(x, dp=cp.to.dp(mh7$cp))
167 my8<-dsn(x, dp=cp.to.dp(mh_8$cp))
168 my89<-dsn(x, dp=cp.to.dp(mh_89$cp))
169 my9<-dsn(x, dp=cp.to.dp(mh_9$cp))
170
171 y4<-dsn(x, dp=cp.to.dp(h4$cp))
172 y5<-dsn(x, dp=cp.to.dp(h5$cp))
173 y6<-dsn(x, dp=cp.to.dp(h6$cp))
174 y7a<-dsn(x, dp=cp.to.dp(h7a$cp))
175 y7b<-dsn(x, dp=cp.to.dp(h7b$cp))
176 y8<-dsn(x, dp=cp.to.dp(h8$cp))
177 y9<-dsn(x, dp=cp.to.dp(h9$cp))
178
179 start_image("rt_time_to_resolve.png")
180 par(mfrow=c(1,1))
181 par(mai=c(1.0,0.7,0.7,0.7))
182 # monitor
183 plot(x, my9, col='blue', type='l', axes=F, xlab="Days to Resolve", ylab="Density")
184 axis(1, labels=c(0.0001, 0.01, 0.1, 1, 5, 20, 100), at=c(0.0001, 0.01, 0.1, 1, 5, 20, 100))
185 axis(2)
186 lines(x, my8, col='dodgerblue')
187 lines(x, my7, col='turquoise')
188 abline(v=x[which(my8==max(my8))])
189 abline(v=x[which(my9==max(my9))])
190
191 # heavy
192 lines(x, y7a, col='green3')
193 lines(x, y4, col='green4')
194 lines(x, y5, col='greenyellow')
195
196 abline(v=x[which(y4==max(y4))])
197 abline(v=x[which(y5==max(y5))])
198 abline(v=x[which(y7a==max(y7a))])
199
200 # light
201 lines(x, y7b, col='orange', type='l')
202 lines(x, y6, col='orange3')
203 lines(x, y8, col='firebrick2')
204 lines(x, y9, col='firebrick4')
205
206 abline(v=x[which(y7b==max(y7b))])
207 abline(v=x[which(y6==max(y6))])
208 abline(v=x[which(y8==max(y8))])
209 abline(v=x[which(y9==max(y9))])
210
211 end_image()
212
213 whisker <- function (x0,y0,sd, length=0.05)
214 {
215     arrows(x0, y0, x0, y0+sd, code=2, angle=90, length=length)
216     arrows(x0, y0, x0, y0-sd, code=2, angle=90, length=length)
217 }
218
219 whisker2 <- function (x0,y0, y0_high, y0_low, col="black", length=0.05)
220 {
221     arrows(x0, y0, x0, y0_high, code=2, angle=90, length=length, col=col)
222     arrows(x0, y0, x0, y0_low, code=2, angle=90, length=length, col=col)
223 }
224
225 start_image("rt_aggregate_times.png")
226 par(mfrow=c(1,1))
227 par(mai=c(1,1,1,1))
228 par(mar=c(5,4,4,4))
229
230 s_list <- c(1519, 1596, 1112, 1591, 1019, 815)
231 m_list <- c(0,0,0,    119,  229,  251)
232 x_tick_list <- c(1,   2.5,  4, 5.5, 7, 8.5)
233 x_tt_resolve_list <- c(1,   2.5,  4, 5.2,5.8, 7, 8.5)
234 y_tt_resolve_list <- c( x[which(y4==max(y4))],
235                             x[which(y5==max(y5))],
236                             x[which(y6==max(y6))],
237                             x[which(y7a==max(y7a))],
238                             x[which(y7b==max(y7b))],
239                             x[which(y8==max(y8))],
240                             x[which(y9==max(y9))])
241
242
243 y_mean_list <- c( h4$cp['mean'],
244                 h5$cp['mean'],
245                 h6$cp['mean'],
246                 h7a$cp['mean'],
247                 h7b$cp['mean'],
248                 h8$cp['mean'],
249                 h9$cp['mean'])
250
251 y_sd_list <- c( h4$cp['s.d.'],
252                 h5$cp['s.d.'],
253                 h6$cp['s.d.'],
254                 h7a$cp['s.d.'],
255                 h7b$cp['s.d.'],
256                 h8$cp['s.d.'],
257                 h9$cp['s.d.'])
258
259 days_tt_resolve <- exp(y_tt_resolve_list)/24
260 days_tt_resolve_low <- exp(y_tt_resolve_list-y_sd_list)/24
261 days_tt_resolve_high <- exp(y_tt_resolve_list+y_sd_list)/24
262
263
264 my_mean_list <- c( mh_8$cp['mean'],
265                 mh_89$cp['mean'],
266                 mh_9$cp['mean'])
267
268 my_sd_list <- c( mh_8$cp['s.d.'],
269                 mh_89$cp['s.d.'],
270                 mh_9$cp['s.d.'])
271
272 mx_tt_resolve_list <- c(7, 8, 8.5)
273 my_tt_resolve_list <- c(x[which(my8==max(my8))],
274                         x[which(my89==max(my89))],
275                         x[which(my9==max(my9))] )
276
277 mdays_tt_resolve <- exp(my_tt_resolve_list)/24
278 mdays_tt_resolve_low <- exp(my_tt_resolve_list-my_sd_list)/24
279 mdays_tt_resolve_high <- exp(my_tt_resolve_list+my_sd_list)/24
280
281
282 days_y_sd_list <- exp(y_sd_list)/24
283 mdays_y_sd_list <- exp(my_sd_list)/24
284
285 days_y_sd_list <- exp(y_sd_list)/24
286 mdays_tt_resolve <- exp(my_tt_resolve_list)/24
287
288 plot(x_tt_resolve_list, days_tt_resolve, type='p', pch=c(22), axes=FALSE, 
289         log='y', ylim=c(.01,350), xlab="Year", ylab='')
290 #points(x_tt_resolve_list, days_tt_resolve, pch=c(22))
291
292 lines(c(x_tt_resolve_list[1:2], x_tt_resolve_list[4]), c(days_tt_resolve[1:2], days_tt_resolve[4]), col='red')
293 lines(c(x_tt_resolve_list[3], x_tt_resolve_list[5:7]), c(days_tt_resolve[3], days_tt_resolve[5:7]), col='green')
294 #lines(mx_tt_resolve_list, mdays_tt_resolve)
295 #points(mx_tt_resolve_list, mdays_tt_resolve, pch=c(24))
296
297 lines(mx_tt_resolve_list, mdays_tt_resolve, col='blue')
298 points(mx_tt_resolve_list, mdays_tt_resolve, pch=c(24))
299
300 ticks<-c(0,0.01, 0.1, 0.5,1,2,4,7,21, 28, 7*8, 7*16)
301
302 axis(1, labels=c('2004', '2005', '2006', '2007', '2008', '2009'), at=x_tick_list)
303 axis(2, labels=ticks, at=ticks)
304 mtext("Days to Resolve Message", 2, line=3)
305 #axis(2, labels=ticks, at=ticks)
306 #for (i in 1:length(days_y_sd_list) ) {
307 #    whisker(x_tt_resolve_list[i], days_tt_resolve[i], days_y_sd_list[i])
308 #}
309 #for (i in 1:length(mdays_y_sd_list) ) {
310 #    whisker(mx_tt_resolve_list[i], mdays_tt_resolve[i], mdays_y_sd_list[i])
311 #}
312 for (i in c(1,2,4) ) {
313     whisker2(x_tt_resolve_list[i], days_tt_resolve[i], 
314             days_tt_resolve_high[i], days_tt_resolve_low[i], col='red')
315 }
316 for (i in c(3,5,6,7) ) {
317     whisker2(x_tt_resolve_list[i], days_tt_resolve[i], 
318             days_tt_resolve_high[i], days_tt_resolve_low[i], col='green')
319 }
320 for (i in 1:length(mdays_y_sd_list) ) {
321     whisker2(mx_tt_resolve_list[i], mdays_tt_resolve[i], 
322             mdays_tt_resolve_high[i], mdays_tt_resolve_low[i], col='blue')
323 }
324
325 abline(h=21,col='grey90')
326 abline(h=2,col='grey90')
327 abline(h=0.5,col='grey80')
328
329 legend(1, .05, 
330         cex=0.7,
331         legend=c("Unstable Periods", "Stable Periods", "MyOps Messages"), 
332         pch=c(22, 22, 24),
333         col=c('red', 'green', 'blue'),
334         lty=c(1, 1,1), merge=T)
335 end_image()
336 # install.packages('UsingR')
337 require(UsingR)
338
339 m<-min(t_4$start)
340 d<-data.frame(
341     '2004'=t_4$start-m,
342     '2005'=t_5$start-m,
343     '2006'=t_6$start-m)
344 simple.violinplot(d)
345
346 par(mfrow=c(3,3))
347 par(mai=c(.3,.3,.3,.3))
348 sp <- function (t)
349 {
350     d <- (t$lastreply-t$start)/(60*60*24)
351     simple.violinplot(log(d))
352 }
353 sp(t_4)
354 sp(t_5)
355 sp(t_6)
356 sp(t_7)
357 sp(t_8)
358 sp(t_9)
359 sp(m_8)
360 sp(m_89)
361 sp(m_9)
362
363
364 t3 <- add_year (t2)
365 m3 <- add_year (m2)
366
367 par(mfrow=c(1,2))
368 par(mai=c(.5,.5,.5,.5))
369 t4<-t3[which((t3$lastreply-t3$start)/(60*60*24) < 20),]
370 t4<-t3
371 simple.violinplot(log((lastreply-start)/(60*60*24)) ~ year, data=t4)
372
373 m3[which((m3$lastreply-m3$start)< 0),]
374 m4<-m3[which((m3$lastreply-m3$start)/(60*60*24) < 100),]
375 simple.violinplot(log((lastreply-start)/(60*60*24)) ~ year, data=m4, log='y')
376
377 meanof <- function (t, year)
378 {
379     tx <- t[which(t$year == year),]
380     r<-sn.em(y=log((tx$lastreply-tx$start)/(60*60*24)))
381     return (r)
382 }
383
384 t_sd <- p
385 t_p <- c( meanof(t3,2004)$cp['mean'],
386         meanof(t3,2005)$cp['mean'],
387         meanof(t3,2006)$cp['mean'],
388         meanof(t3,2007)$cp['mean'],
389         meanof(t3,2008)$cp['mean'],
390         meanof(t3,2009)$cp['mean'],
391         meanof(t3,2010)$cp['mean'])
392 points(t_p)
393 for (i in 1:length(t_sd) ) {
394     whisker(i, t_p[i], exp(t_sd[i]))
395 }
396
397
398
399
400
401
402 #for (i in 1:length(y_tt_resolve_list) ) { 
403 #    whisker(x_tt_resolve_list[i], scale_by*y_tt_resolve_list[i], scale_by*2) 
404 #}
405 #for (i in 1:length(my_tt_resolve_list) ) { 
406 #    whisker(mx_tt_resolve_list[i], scale_by*my_tt_resolve_list[i], scale_by*2) 
407 #}
408
409 #
410 #end_image()
411 #par(mfrow=c(2,1))
412 #plot_rt_hist(t_67)
413 #plot_rt_hist(t_89)
414 par(mfrow=c(1,1))
415