add new scripts
[monitor.git] / statistics / operator_overhead.r
1 source("functions.r");
2
3
4 available_nodes <- function (ns, from, to, type, fmt="%b")
5 {
6     # find 'type' range of days
7     dates <-seq(as.Date(from), as.Date(to), type)
8     months <- format(dates, fmt)
9     hbreaks<-unclass(as.POSIXct(dates))
10
11     xx<-NULL;
12     yy<-NULL;
13
14     for ( i in seq(1,length(hbreaks)-1) )
15     {
16         # get range from ns
17         ns_sub <- ns[which(ns$date > hbreaks[i] & ns$date <= hbreaks[i+1] & ns$status == 'BOOT'),]
18         nodes <- length(ns_sub$date)
19
20         xx<- c(xx, hbreaks[i])
21         yy<- c(yy, nodes)
22
23     }
24     m<- months[1:length(months)-1]
25     return (rbind(xx,yy,m))
26 }
27
28
29
30 open_tickets <- function (t, from, to, type, fmt="%b")
31 {
32     # find 'type' range of days
33     dates <-seq(as.Date(from), as.Date(to), type)
34     months <- format(dates, fmt)
35     hbreaks<-unclass(as.POSIXct(dates))
36
37     xx<-NULL;
38     yy<-NULL;
39
40     for ( i in seq(1,length(hbreaks)-1) )
41     {
42         # identify any tickets with a start time in range, lastreply in range
43         # or where both start is less and lastreply is greater than the range
44         t_sub <- t[which( (t$start < hbreaks[i] & t$lastreply > hbreaks[i+1]) | 
45                           (t$start > hbreaks[i] & t$start <= hbreaks[i+1]) | 
46                           (t$lastreply > hbreaks[i] & t$lastreply <= hbreaks[i+1]) ),]
47         tickets <- length(t_sub$start)
48         #if ( nrow(t_sub) > 0 ){
49         #    for ( j in seq(1,nrow(t_sub)) )
50         #    {
51         #        #print(sprintf("id %s, date %s", t_sub[i,'ticket_id'], t_sub[i,'s1']))
52         #        print(sprintf("id %s, date %s", t_sub[j,]$ticket_id, t_sub[j, 's1']))
53         #    }
54         #}
55
56         xx<- c(xx, hbreaks[i])
57         yy<- c(yy, tickets)
58
59     }
60     m<- months[1:length(months)-1]
61     return (rbind(xx,yy,m))
62 }
63
64 online_nodes <- function (fb)
65 {
66     breaks <- unique(fb$timestamp)
67     n<-NULL
68     o<-NULL
69     x<-NULL
70     for (i in seq(1,length(breaks)) )
71     {
72         ts <- breaks[i]
73         sub <- fb[which(fb$timestamp == ts),]
74         node_count   <- length(unique(sub$hostname))
75         online_count <- length(unique(sub$hostname[which(sub$state=='BOOT')]))
76         x<-c(x,ts)
77         n<-c(n,node_count)
78         o<-c(o,online_count)
79     }
80     print(length(x))
81     print(length(n))
82     print(length(o))
83     return (rbind(x,n,o))
84 }
85
86 lowess_smooth <- function (x, y, delta=(60*60*24), f=0.02)
87 {
88     a<-lowess(x, y, delta=delta, f=f)
89     return (a);
90 }
91
92 #####
93
94 # system("parse_rt_data.py 3 > rt_data.csv");
95 t <- read.csv('rt_data_2004-2010.csv', sep=',', header=TRUE)
96 t2 <- t[which(t$complete == 1),]
97 ot <- open_tickets(t2, '2004/1/1', '2010/2/28', 'day', "%b")
98
99 start_image("rt_operator_overhead.png")
100 par(mfrow=c(2,1))
101 par(mai=c(0,1,0.1,0.1))
102
103 x1<-as.numeric(ot[1,])
104 y1<-as.numeric(ot[2,])
105
106 a_ot<-lowess_smooth(x1, y1)
107
108 plot(x1, y1, col='grey80', type='l', axes=F, 
109     ylab="Open Tickets (tickets/day)", xlab="Date",
110     ylim=c(0,120)) # , ylim=c(0,260))
111 lines(a_ot$x, round(a_ot$y), col='black')
112
113 #axis(1, labels=ot[3,], at=ot[1,], cex.axis=0.7)
114 axis(2, las=1)
115 #mtext("2004           2005           2006           2007           2008           2009", 1,2)
116
117 #abline_at_date('2005-01-01', 'grey60')
118 #abline_at_date('2006-01-01', 'grey60')
119 #abline_at_date('2007-01-01', 'grey60')
120 #abline_at_date('2008-01-01', 'grey60')
121 #abline_at_date('2009-01-01', 'grey60')
122 #abline_at_date('2010-01-01', 'grey60')
123 abline(h=25, lty=2, col='grey80')
124 abline(h=40, lty=2, col='grey80')
125
126 tstamp_20041112 <-abline_at_date("2004-11-12", col='grey60', lty=2)
127 tstamp_20050301 <-abline_at_date("2005-03-01", col='grey60', lty=2)
128 tstamp_20050615 <-abline_at_date("2005-06-15", col='grey60', lty=2)
129 tstamp_20051023 <-abline_at_date("2005-10-23", col='grey60', lty=2)
130 tstamp_20070101 <-abline_at_date("2007-01-01", col='grey60', lty=2)
131 tstamp_20070501 <-abline_at_date("2007-05-01", col='grey60', lty=2)
132 tstamp_20080601 <-abline_at_date("2008-06-01", col='grey60', lty=2)
133 tstamp_20080815 <-abline_at_date("2008-08-15", col='grey60', lty=2)
134 tstamp_20090501 <-abline_at_date("2009-05-01", col='grey60', lty=2)
135 tstamp_20100201 <-abline_at_date("2010-02-01", col='white', lty=2)
136
137
138 text(x=c( tstamp_20041112+(tstamp_20050301-tstamp_20041112)/2,
139         tstamp_20050301+(tstamp_20050615-tstamp_20050301)/2,
140         tstamp_20050615+(tstamp_20051023-tstamp_20050615)/2,
141         tstamp_20051023+(tstamp_20070101-tstamp_20051023)/2,
142         tstamp_20070101+(tstamp_20070501-tstamp_20070101)/2,
143         tstamp_20080601+(tstamp_20080815-tstamp_20080601)/2,
144         tstamp_20090501+(tstamp_20100201-tstamp_20090501)/2 ),
145      y=c(120),
146      labels=c('3.0', '3.1', '3.1S', '3.2', '4.0', '4.2', '4.3')) 
147
148 par(mai=c(1,1,0.1,0.1))
149 for ( s in c(7) ) 
150 {
151     d<- median_time_to_resolve_window(t2, "2004/1/1", "2010/2/28", s, "%b")
152     plot(d[,1], exp(as.numeric(d[,5]))/24, type='l', lty=1, xlab="",
153             axes=F, ylim=c(0.01, 15), ylab="Resolution Time by", col='grey50',
154             xlim=c(min(x1), max(x1)))
155     mtext("Quartile (days)", 2, 2)
156     lines(d[,1], exp(as.numeric(d[,4]))/24, lty=1, col='black')
157     lines(d[,1], exp(as.numeric(d[,3]))/24, lty=1, col='grey50')
158     #axis(1, labels=d[,7], at=d[,1])
159     axis(1, labels=ot[3,], at=ot[1,], cex.axis=0.7)
160     mtext("2004           2005           2006           2007           2008           2009", 1,2)
161     axis(2, las=1)
162     m<-round(max(exp(as.numeric(d[,4]))/24), 2)
163     axis(2, labels=m, at=m, las=1)
164     abline(h=m, lty=2, col='grey40')
165 }
166
167 tstamp_20041112 <-abline_at_date("2004-11-12", col='grey60', lty=2)
168 tstamp_20050301 <-abline_at_date("2005-03-01", col='grey60', lty=2)
169 tstamp_20050615 <-abline_at_date("2005-06-15", col='grey60', lty=2)
170 tstamp_20051023 <-abline_at_date("2005-10-23", col='grey60', lty=2)
171 tstamp_20070101 <-abline_at_date("2007-01-01", col='grey60', lty=2)
172 tstamp_20070501 <-abline_at_date("2007-05-01", col='grey60', lty=2)
173 tstamp_20080601 <-abline_at_date("2008-06-01", col='grey60', lty=2)
174 tstamp_20080815 <-abline_at_date("2008-08-15", col='grey60', lty=2)
175 tstamp_20090501 <-abline_at_date("2009-05-01", col='grey60', lty=2)
176 tstamp_20100201 <-abline_at_date("2010-02-01", col='white', lty=2)
177
178
179 text(x=c( tstamp_20041112+(tstamp_20050301-tstamp_20041112)/2,
180         tstamp_20050301+(tstamp_20050615-tstamp_20050301)/2,
181         tstamp_20050615+(tstamp_20051023-tstamp_20050615)/2,
182         tstamp_20051023+(tstamp_20070101-tstamp_20051023)/2,
183         tstamp_20070101+(tstamp_20070501-tstamp_20070101)/2,
184         tstamp_20080601+(tstamp_20080815-tstamp_20080601)/2,
185         tstamp_20090501+(tstamp_20100201-tstamp_20090501)/2 ),
186      y=c(15),
187      labels=c('3.0', '3.1', '3.1S', '3.2', '4.0', '4.2', '4.3')) 
188
189 end_image()