clearer names for actions, and infer actions better
[monitor.git] / statistics / node_history_ttr.r
1 source("functions.r");
2
3 nsh <- read.csv('node_status_history.csv', sep=',', header=TRUE)
4
5 # system("./harvest_nodehistory.py > node_status_history_nopcu.csv")
6 nsh_nopcu <- read.csv('node_status_history_nopcu.csv', sep=',', header=TRUE)
7
8 nsh_m1 <- read.csv('node_status_history_m1.csv', sep=',', header=TRUE)
9 # system("stats-m1/harvest_nodehistory_m1.py > ./node_status_history_m1_nopcu.csv")
10 nsh_m1_nopcu_total <- read.csv('node_status_history_m1_nopcu_total.csv', sep=',', header=TRUE)
11 nsh_m1_nopcu_notice <- read.csv('node_status_history_m1_nopcu.csv', sep=',', header=TRUE)
12 nsh_m1_nopcu_kernel <- read.csv('node_status_history_m1_nopcu_may08sep08.csv', sep=',', header=TRUE)
13
14 node_hist_image <- function (t, year, from, to, max=0, type="week", title="")
15 {
16     dates <-seq(as.Date(from), as.Date(to), type)
17     months <- format(dates, "%b-%d")
18     hbreaks<-unclass(as.POSIXct(dates))
19
20     image <- matrix(data=0, nrow=max(as.numeric(t$hostname)), ncol=length(hbreaks))
21
22     for ( i in seq(1, length(hbreaks)) )
23     {
24         # find the range : d plus a day
25         d <- hbreaks[i]
26         d_end <- d+60*60*24
27         # find unique hosts in this day range
28         t_sub <- t[which(t$start > d & t$start <= d_end & t$status == 'down'),]
29         unique_hosts <- unique(t_sub$hostname)
30         if (length(unique_hosts) == 0 ) { next }
31
32         host_n_list <- unique_hosts
33         host_s_list <- as.character(unique_hosts)
34
35         for ( hi in seq(1, length(unique_hosts))  ) 
36         {
37             host_s <- host_s_list[hi]
38             host_n <- host_n_list[hi]
39             # events for this host after d (to avoid already identified events)
40             ev <- t[which(t$hostname == host_s & t$start > d ),]
41             print (sprintf("events length for host %s %s", host_s, length(ev$start)));
42             # get down events for this host
43             down_ev_index_list <- which(ev$status == 'down')
44             for ( e_i in down_ev_index_list )
45             {
46                 if ( e_i == length(ev$status) ) { 
47                     # then the node ends down, so fill in the rest with 1.
48                     for ( j in seq(i,length(hbreaks)) ) {
49                         image[host_n,j] <- 1
50                     }
51                 } else {
52                     # then there is a subsequent 'good' event
53                     good_ev <- ev[e_i+1,]
54                     down_ev <- ev[e_i,]
55                     dbreaks <- seq(d,good_ev$start+60*60*24,60*60*24)
56                     # for every index for time d, to good_ev$start
57                     l<-length(dbreaks)
58                     print (sprintf("length %s",l));
59                     for ( j in seq(i,i+l) )
60                     {
61                         image[host_n,j] <- 1
62                     }
63                 }
64             }
65         }
66     }
67     myImagePlot(image, xLabels=months, yLabels=c(""), title=title)
68     return (image);
69 }
70
71
72
73 node_hist_dist <- function (t, year, from, to, max=0, type="week", title="")
74 {
75     dates <-seq(as.Date(from), as.Date(to), type)
76     months <- format(dates, "%b-%d")
77     hbreaks<-unclass(as.POSIXct(dates))
78     current_ts <- unclass(as.POSIXct(Sys.Date()))
79
80     dist <- NULL
81
82     unique_hosts <- unique(t$hostname)
83     host_n_list <- unique_hosts
84     host_s_list <- as.character(unique_hosts)
85
86     down <- 0
87
88     for ( hi in seq(1, length(unique_hosts))  ) 
89     {
90         host_s <- host_s_list[hi]
91         host_n <- host_n_list[hi]
92         # events for this host after d (to avoid already identified events)
93         ev <- t[which( t$hostname == host_s ),]
94         print (sprintf("events length for host %s %s", host_s, length(ev$start)));
95         # get down events for this host
96         down_ev_index_list <- which(ev$status == 'down')
97         for ( e_i in down_ev_index_list )
98         {
99             # when equal, there is no resolution so leave it as down
100             if ( e_i != length(ev$status) ) { 
101                 good_ev <- ev[e_i+1,]
102                 down_ev <- ev[e_i,]
103                 dist <- c(dist, good_ev$start - down_ev$start)
104             } else if ( e_i == length(ev$status) && length(ev$status) == 1) { 
105                 print (sprintf("DOWN FOREVER! %s", length(ev$start) ))
106                 down <- down + 1
107                 dist <- c(dist, 10*current_ts - ev$start)
108             }
109         }
110     }
111     print(down);
112     return (dist);
113 }
114
115 # Image Stuff
116 #nsh_image <- node_hist_image(nsh, '2009', '2009-06-01', '2010-02-28', 0, 'day')
117 #nsh_image_m1 <- node_hist_image(nsh_m1, '2009', '2008-10-01', '2009-03-28', 0, 'day')
118
119 #nsh_short <- nsh[which(nsh$start > unclass(as.POSIXct("2009-06-01", origin="1970-01-01"))[1]),]
120 #nsh_short <- nsh_short[which(nsh_short$start < unclass(as.POSIXct("2009-10-31", origin="1970-01-01"))[1]),]
121
122 #
123 #nsh_short <- nsh_nopcu
124 #nsh_dist <- node_hist_dist(nsh_short, '2009', '2009-06-01', '2010-02-28', 0, 'day')
125 #d<- ecdf(nsh_dist/(60*60*24))
126
127 #nsh_m1_short <- nsh_m1[which(nsh_m1$start > unclass(as.POSIXct("2008-10-01", origin="1970-01-01"))[1]),]
128
129 # M1 -- 
130 # TOTAL
131 t_0815 <- unclass(as.POSIXct("2008-08-15", origin="1970-01-01"))[1]
132 t_0905 <- unclass(as.POSIXct("2008-09-05", origin="1970-01-01"))[1]
133
134 t_0924 <- unclass(as.POSIXct("2008-09-24", origin="1970-01-01"))[1]
135 t_1015 <- unclass(as.POSIXct("2008-10-15", origin="1970-01-01"))[1]
136
137 t_0223 <- unclass(as.POSIXct("2009-02-23", origin="1970-01-01"))[1]
138 t_0313 <- unclass(as.POSIXct("2009-03-13", origin="1970-01-01"))[1]
139
140 #nsh_m1_short <- nsh_m1_nopcu_total[which( 
141 #        (nsh_m1_nopcu_total$start > t_0815 & nsh_m1_nopcu_total$start <= t_0905) |
142 #        (nsh_m1_nopcu_total$start > t_0924 & nsh_m1_nopcu_total$start <= t_1015) |
143 #        (nsh_m1_nopcu_total$start > t_0223 & nsh_m1_nopcu_total$start <= t_0313)  ),]
144 #
145 #nsh_m1_short <- nsh_m1_nopcu_total[which( 
146 #        (nsh_m1_nopcu_total$start > t_0815 & nsh_m1_nopcu_total$start <= t_0905) |
147 #        (nsh_m1_nopcu_total$start > t_0924 & nsh_m1_nopcu_total$start <= t_1015) ),]
148
149 nsh_m1_short <- nsh_m1_nopcu_total[which( 
150         (nsh_m1_nopcu_total$start > t_0815 & nsh_m1_nopcu_total$start <= t_0313) ),]
151 nsh_dist_m1 <- node_hist_dist(nsh_m1_short, '2008', '2008-05-01', '2009-05-22', 0, 'day')
152 d_m1_total<- ecdf(nsh_dist_m1/(60*60*24))
153
154
155 # NOTE: something happened betweeen 10-2 and 10-3
156 # NOTICE BUG
157 t_1015 <- unclass(as.POSIXct("2008-10-15", origin="1970-01-01"))[1]
158 t_0224 <- unclass(as.POSIXct("2009-02-24", origin="1970-01-01"))[1]
159 nsh_m1_short <- nsh_m1_nopcu_notice[which(nsh_m1_nopcu_notice$start > t_1015 & nsh_m1_nopcu_notice$start <= t_0224),]
160 nsh_dist_m1 <- node_hist_dist(nsh_m1_short, '2008', '2008-10-01', '2009-03-22', 0, 'day')
161 d_m1_notice_bug <- ecdf(nsh_dist_m1/(60*60*24))
162
163
164 # KERNEL BUG
165 t_0530 <- unclass(as.POSIXct("2008-05-30", origin="1970-01-01"))[1]
166 t_0815 <- unclass(as.POSIXct("2008-08-15", origin="1970-01-01"))[1]
167 nsh_m1_short <- nsh_m1_nopcu_kernel[which(nsh_m1_nopcu_kernel$start > t_0530 & nsh_m1_nopcu_kernel$start <= t_0815),]
168 nsh_dist_m1 <- node_hist_dist(nsh_m1_short, '2008', '2008-05-10', '2008-08-15', 0, 'day')
169 d_m1_kernel_bug <- ecdf(nsh_dist_m1/(60*60*24))
170
171
172 # d<-ecdf(nsh_dist[which(nsh_dist/(60*60*24) < 90 )]/(60*60*24)), 
173 # 180 ~= 6 months.
174 par(mfrow=c(1,1))
175 par(mai=c(.9,.9,.1,.1))
176 #start_image("node_history_ttr_nopcu.png")
177 #plot(d, xlim=c(0,180), ylim=c(0,1), axes=F, xlab="Days to Resolve", ylab="Percentile",
178 #   col.hor='red', col.vert='red', pch='.', col.points='red', main="")
179
180 plot(d_m1_total, xlim=c(0,180), ylim=c(0,1), axes=F, xlab="Days to Resolve", 
181     ylab="Percentile", col.hor='red', col.vert='red', pch='.', 
182     col.points='red', main="")
183
184 plot(d_m1_notice_bug, xlim=c(0,180), ylim=c(0,1), xlab="Days to Resolve", 
185     ylab="Percentile", col.hor='blue', col.vert='blue', pch='.', 
186     col.points='blue', add=TRUE)
187
188 plot(d_m1_kernel_bug, xlim=c(0,180), ylim=c(0,1), xlab="Days to Resolve", 
189     ylab="Percentile", col.hor='green', col.vert='green', pch='.', 
190     col.points='green', add=TRUE)
191
192 weeks <- c(0,7,14,21,28,60,90,120,150,180)
193 axis(1, labels=weeks, at=weeks)
194 percentages <- c(0,0.25, 0.5, 0.75, 0.85, 0.95, 1)
195 axis(2, las=1, labels=percentages, at=percentages)
196
197 abline(v=c(7,14,21,28), col='grey80', lty=2)
198 abline(h=c(0.5, 0.6, 0.75, 0.85, 0.95 ), col='grey80', lty=2)
199 abline(v=c(91), col='grey80', lty=2)
200
201
202 legend(100, 0.1,
203        cex=0.7,
204        legend=c("Typical MyOps -- fix1 through fix4", "Notice Bug", "Kernel Bug"),
205        pch=c('-', '-', '-'),
206        col=c('red', 'blue', 'green'),
207        lty=c(1, 1, 1), merge=T)
208
209 end_image()