clearer names for actions, and infer actions better
[monitor.git] / statistics / rt_s3_overhead.r
1 source("functions.r");
2
3
4
5 median_time_to_resolve_window <- function (t, tg, window)
6 {
7     hbreaks<-tg$week_ts
8
9     xx<-NULL;
10     yy<-NULL;
11     yy_sd_high<-NULL;
12     yy_sd_low<-NULL;
13     date_index <- NULL;
14     q_list <- NULL;
15
16     x<-seq(-20,20,0.01)
17
18     for ( i in seq(1,length(hbreaks)-window-1) )
19     {
20         print (sprintf("round %s of %s", i, length(hbreaks)-window-1))
21         # get range from t
22         t_sub <- t[which(t$start > hbreaks[i] & t$start<= hbreaks[i+window]),]
23         if ( length(t_sub$start) <= 1 )  { next }
24         # take log, then sn.mle -> h
25         d <- (t_sub$lastreply - t_sub$start)/(60*60)    # hours
26         d <- log(d)                                     # log(hours)
27             # sn.mle
28         print (sprintf("length: %s", length(d)))
29         q<-quantile(d)
30         print(q)
31
32         date_index <- c(date_index, round(i+window/2))
33
34         xx<- c(xx, hbreaks[round(i+window/2)])
35         q_list <- rbind(q_list, q)
36
37     }
38     return (cbind(xx,q_list))
39 }
40
41 available_nodes <- function (ns, from, to, type, fmt="%b")
42 {
43     # find 'type' range of days
44     dates <-seq(as.Date(from), as.Date(to), type)
45     months <- format(dates, fmt)
46     hbreaks<-unclass(as.POSIXct(dates))
47
48     xx<-NULL;
49     yy<-NULL;
50
51     for ( i in seq(1,length(hbreaks)-1) )
52     {
53         # get range from ns
54         ns_sub <- ns[which(ns$date > hbreaks[i] & ns$date <= hbreaks[i+1] & ns$status == 'BOOT'),]
55         nodes <- length(ns_sub$date)
56
57         xx<- c(xx, hbreaks[i])
58         yy<- c(yy, nodes)
59
60     }
61     m<- months[1:length(months)-1]
62     return (rbind(xx,yy,m))
63 }
64
65
66 open_tickets <- function (t, tg)
67 {
68     xx<-NULL;
69     yy<-NULL;
70
71     hbreaks<-tg$day_ts
72
73     for ( i in seq(1,length(hbreaks)-1) )
74     {
75         # identify any tickets with a start time in range, lastreply in range
76         # or where both start is less and lastreply is greater than the range
77         t_sub <- t[which( (t$start < hbreaks[i] & t$lastreply > hbreaks[i+1]) | 
78                           (t$start > hbreaks[i] & t$start <= hbreaks[i+1]) | 
79                           (t$lastreply > hbreaks[i] & t$lastreply <= hbreaks[i+1]) ),]
80         tickets <- length(t_sub$start)
81
82         xx<- c(xx, hbreaks[i])
83         yy<- c(yy, tickets)
84     }
85     return (rbind(xx,yy))
86 }
87
88 online_nodes <- function (fb)
89 {
90     breaks <- unique(fb$timestamp)
91     n<-NULL
92     o<-NULL
93     x<-NULL
94     for (i in seq(1,length(breaks)) )
95     {
96         ts <- breaks[i]
97         sub <- fb[which(fb$timestamp == ts),]
98         node_count   <- length(unique(sub$hostname))
99         online_count <- length(unique(sub$hostname[which(sub$state=='BOOT')]))
100         x<-c(x,ts)
101         n<-c(n,node_count)
102         o<-c(o,online_count)
103     }
104     print(length(x))
105     print(length(n))
106     print(length(o))
107     return (rbind(x,n,o))
108 }
109
110 #####
111
112 # system("rt_s1_raw_dump.py --runsql");
113 # system("rt_s2_parse_raw.py 3 > rt_data.csv");
114 # t <- read.csv('rt_data_2004-2011.csv', sep=',', header=TRUE)
115 #t <- read.csv(, sep=',', header=TRUE)
116
117 draw_rt_data <- function (input_filename, output_filename, start_date, end_date, draw=TRUE, one=FALSE)
118 {
119     t <- read.csv(input_filename, sep=',', header=TRUE)
120     t2 <- t[which(t$complete == 1),]
121
122     tg <- time_graph_setup(start_date, end_date) 
123     ot <- open_tickets(t2, tg)
124
125     if ( draw == TRUE ) {
126         start_image(output_filename, width=600, height=400)
127     }
128     if ( one == TRUE )
129     {
130         par(mfrow=c(1,1))
131         par(mai=c(0.8,1,0.4,0.1))
132     } else {
133         par(mfrow=c(2,1))
134         par(mai=c(0,1,0.3,0.1))
135     }
136
137     x1<-as.numeric(ot[1,])
138     y1<-as.numeric(ot[2,])
139
140     a_ot<-lowess_smooth(x1, y1)
141
142     plot(x1, y1, col='grey80', type='l', axes=F, 
143         ylab="a) Open Tickets (tickets/day)", xlab="Date",
144         ylim=c(0,120)) # , ylim=c(0,260))
145     lines(a_ot$x, round(a_ot$y), col='black')
146
147     axis(2, las=1)
148     if ( one == TRUE ) {
149         axis(1, labels=tg$month_str, at=tg$month_ts, cex.axis=0.7)
150         axis(1, labels=tg$year_str, at=tg$year_ts, cex.axis=0.7, line=1, lwd=0)
151     }
152
153
154     abline(h=15, lty=3, col='grey80')
155     abline(h=25, lty=3, col='grey80')
156     abline(h=40, lty=3, col='grey80')
157
158     plc_releases(120)
159     if ( one == FALSE )
160     {
161         par(mai=c(1,1,0.1,0.1))
162         for ( s in c(5) ) 
163         {
164             d <- median_time_to_resolve_window(t2, tg, s) # "2004/1/1", "2011/1/28", s, "%b")
165             plot(d[,1], exp(as.numeric(d[,5]))/24, type='l', lty=1, xlab="",
166                     axes=F, ylim=c(0.01, 15), ylab="b) Resolution Time by", col='black',
167                     xlim=c(min(x1), max(x1)))
168             mtext("Quartile (days)", 2, 2)
169             lines(d[,1], exp(as.numeric(d[,4]))/24, lty=1, col='grey50')
170             lines(d[,1], exp(as.numeric(d[,3]))/24, lty=1, col='grey75')
171             axis(1, labels=tg$month_str, at=tg$month_ts, cex.axis=0.7)
172             axis(1, labels=tg$year_str, at=tg$year_ts, cex.axis=0.7, line=1, lwd=0)
173             axis(2, labels=c(0,1,4,7,14), at=c(0,1,4,7,14), las=1)
174             m<-round(max(exp(as.numeric(d[,4]))/24), 2)
175         }
176
177         abline(h=1, lty=3, col='grey80')
178         abline(h=4, lty=3, col='grey80')
179         abline(h=7, lty=3, col='grey80')
180
181         planetlab_releases(15)
182     }
183
184     if ( draw == TRUE ) {
185         end_image()
186     }
187 }
188
189 #system("./rt_s2_parse_raw.py 3 > rt_data_2004-2011.csv");
190 draw_rt_data('rt_data_2004-2011.csv', "rt_operator_support_2004-2011.png", "2004/1/1", "2011/6/1", TRUE, TRUE)
191 #draw_rt_data('rt_data_monitor_2004-2011.csv',"rt_operator_monitor_2004-2011.png", "2004/1/1", "2011/4/1")
192
193 #draw_rt_data('short_support_20110101.csv',"rt_short_2011.png", "2010/11/1", "2011/4/1", FALSE)