From: Stephen Soltesz Date: Thu, 11 Feb 2010 20:14:07 +0000 (+0000) Subject: R routines for printing some statistics X-Git-Tag: Monitor-3.0-31~8 X-Git-Url: http://git.onelab.eu/?p=monitor.git;a=commitdiff_plain;h=5a40e4f30c45995dcbd06c5f9a7b5f997c4c926c R routines for printing some statistics --- diff --git a/statistics/bm_reboot.r b/statistics/bm_reboot.r new file mode 100644 index 0000000..e0ff6df --- /dev/null +++ b/statistics/bm_reboot.r @@ -0,0 +1,52 @@ + +source("functions.r"); + +# system("parse_rt_data.py > rt_data.csv"); +# ./bmevents.py events.1-18-10 BootUpdateNode > bm_reboot_2010-01-18.csv +# ./bmevents.py events.10-08-09 BootUpdateNode > bm_reboot_2009-10-08.csv +# ./bmevents.py events.29.12.08.dump BootUpdateNode > bm_reboot_2008-12-29.csv +# ./bmevents.py events.8-25-09.dump BootUpdateNode > bm_reboot_2009-08-25.csv +# +t <- read.csv('bm_reboot.csv', sep=',', header=TRUE) + +t2<-t + +tstamp_78 <-unclass(as.POSIXct("2008-01-01", origin="1960-01-01"))[1] +tstamp_89 <-unclass(as.POSIXct("2009-01-01", origin="1960-01-01"))[1] + +t_7 <- t2[which( t2$start < tstamp_78 ),] +t_8 <- t2[which( t2$start >= tstamp_78 & t2$start < tstamp_89 ),] +t_9 <- t2[which( t2$start >= tstamp_89 ),] + +tstamp <-unclass(as.POSIXct("2008-01-01", origin="1960-01-01")) +t_67 <- t2[which( t2$start < tstamp[1] ),] +t_89 <- t2[which( t2$start >= tstamp[1] ),] + + +#start_image("bm_reboot.png") + +par(mfrow=c(2,1)) +par(mai=c(.5,.4,.5,.4)) +year_hist(t_9, "2009", "2009/06/21", "2010/2/10", 500, 'day', "Daily Reboot Rates") +rows <- year_hist_unique(t_9, "2009", "2009/06/21", "2010/2/10", 100, 'day', "Unique Daily Reboots") + +#end_image() + +start_image("reboot_distributions.png") +par(mfrow=c(2,1)) +par(mai=c(.5,.5,.5,.5)) + +m<-mean(rows$reboots[which(rows$reboots>0&rows$reboots<50)]) +s<-sd(rows$reboots[which(rows$reboots>0&rows$reboots<50)]) + +qqnorm(rows$reboots[which(rows$reboots>0&rows$reboots<50)]) +qqline(rows$reboots[which(rows$reboots>0&rows$reboots<50)]) + +h<-hist(rows$reboots[which(rows$reboots>0&rows$reboots<50)], breaks=20) +x<- 0:100/100 * 2 * m +y<- dnorm(x, mean=m, sd=s) +lines(x,y*max(h$counts)/max(y)) +end_image() + +par(mfrow=c(1,1)) +par(mai=c(.7,.7,.7,.7)) diff --git a/statistics/bm_reboot_api.r b/statistics/bm_reboot_api.r new file mode 100644 index 0000000..3bea8f6 --- /dev/null +++ b/statistics/bm_reboot_api.r @@ -0,0 +1,74 @@ + +source("functions.r"); + +# system("parse_rt_data.py > rt_data.csv"); +# ./bmevents.py events.1-18-10 BootUpdateNode > bm_reboot_2010-01-18.csv +# ./bmevents.py events.10-08-09 BootUpdateNode > bm_reboot_2009-10-08.csv +# ./bmevents.py events.29.12.08.dump BootUpdateNode > bm_reboot_2008-12-29.csv +# ./bmevents.py events.8-25-09.dump BootUpdateNode > bm_reboot_2009-08-25.csv +# +t <- read.csv('bm_reboot_2008-12-29.csv', sep=',', header=TRUE) + +t2<-t + +tstamp_78 <-unclass(as.POSIXct("2008-01-01", origin="1960-01-01"))[1] +tstamp_89 <-unclass(as.POSIXct("2009-01-01", origin="1960-01-01"))[1] + +t_7 <- t2[which( t2$start < tstamp_78 ),] +t_8 <- t2[which( t2$start >= tstamp_78 & t2$start < tstamp_89 ),] +t_9 <- t2[which( t2$start >= tstamp_89 ),] + +tstamp <-unclass(as.POSIXct("2008-01-01", origin="1960-01-01")) +t_67 <- t2[which( t2$start < tstamp[1] ),] +t_89 <- t2[which( t2$start >= tstamp[1] ),] + +start_image("bm_reboot_api.png") + +par(mfrow=c(2,1)) +par(mai=c(.5,.4,.5,.4)) +year_hist(t_9, "2009", "2009/06/21", "2010/2/10", 500, 'day', "Daily Reboot Rates") +rows_api <- year_hist_unique(t_9, "2009", "2009/06/21", "2010/2/10", 100, 'day', "Unique Daily Reboots") + +#year_hist(t_89, "2008-2009", "2008/01/21", "2010/2/10", 0, 'day', "Daily Reboot Rates") +#rows <- year_hist_unique(t_89, "2008-2009", "2008/01/21", "2010/2/10", 0, 'day', "Unique Daily Reboots") + +end_image() + + +## NOTE: compare api and log data: +start_image("bm_reboot_compare.png", width=960) +par(mfrow=c(1,1)) +par(mai=c(1.0,.7,.7,.7)) +x<- cbind(rows$reboots, rows_api$reboots) +#barplot(t(x), beside=TRUE, ylim=c(0,150), main="Compare Daily Frequency of Raw-logs & API Events") +barplot(rows$reboots-rows_api$reboots, ylim=c(-40,150), main="Difference between Raw-logs & API Events", xlab="Day", ylab="Difference of Frequency") +end_image() + +# it appears that logs come out ahead consistently of the API events. +start_image("bm_reboot_diff_freq.png") +d<-rows$reboots-rows_api$reboots +hist(d[which( d > -10 & d < 20)], breaks=20, main="Frequency of Differences", xlab="Difference") +end_image() + +# * why is this so? + +### + +start_image("reboot_distributions.png") +par(mfrow=c(2,1)) +par(mai=c(.5,.5,.5,.5)) + +m<-mean(rows$reboots[which(rows$reboots>0&rows$reboots<50)]) +s<-sd(rows$reboots[which(rows$reboots>0&rows$reboots<50)]) + +qqnorm(rows$reboots[which(rows$reboots>0&rows$reboots<50)]) +qqline(rows$reboots[which(rows$reboots>0&rows$reboots<50)]) + +h<-hist(rows$reboots[which(rows$reboots>0&rows$reboots<50)], breaks=20) +x<- 0:100/100 * 2 * m +y<- dnorm(x, mean=m, sd=s) +lines(x,y*max(h$counts)/max(y)) +end_image() + +par(mfrow=c(1,1)) +par(mai=c(.7,.7,.7,.7)) diff --git a/statistics/bm_reboot_unique.r b/statistics/bm_reboot_unique.r new file mode 100644 index 0000000..29f2bf6 --- /dev/null +++ b/statistics/bm_reboot_unique.r @@ -0,0 +1,250 @@ + +source("functions.r"); + +# system("parse_rt_data.py > rt_data.csv"); +# ./bmevents.py events.1-18-10 BootUpdateNode > bm_reboot_2010-01-18.csv +# ./bmevents.py events.10-08-09 BootUpdateNode > bm_reboot_2009-10-08.csv +# ./bmevents.py events.29.12.08.dump BootUpdateNode > bm_reboot_2008-12-29.csv +# ./bmevents.py events.8-25-09.dump BootUpdateNode > bm_reboot_2009-08-25.csv +# +bm <- read.csv('bm_reboot.csv', sep=',', header=TRUE) +bm_api <- read.csv('bm_reboot_2008-12-29.csv', sep=',', header=TRUE) + +bm2<-bm + +tstamp_78 <-unclass(as.POSIXct("2008-01-01", origin="1960-01-01"))[1] +tstamp_89 <-unclass(as.POSIXct("2009-01-01", origin="1960-01-01"))[1] + +bm_7 <- bm2[which( bm2$start < tstamp_78 ),] +bm_8 <- bm2[which( bm2$start >= tstamp_78 & bm2$start < tstamp_89 ),] +bm_9 <- bm2[which( bm2$start >= tstamp_89 ),] + +tstamp <-unclass(as.POSIXct("2008-01-01", origin="1960-01-01")) +bm_67 <- bm2[which( bm2$start < tstamp[1] ),] +bm_89 <- bm2[which( bm2$start >= tstamp[1] ),] + + +#start_image("bm_reboot.png") + +par(mfrow=c(2,1)) +par(mai=c(.5,.4,.5,.4)) +#year_hist(bm_9, "2009", "2009/06/21", "2010/2/10", 500, 'day', "Daily Reboot Rates") +#rows <- year_hist_unique(bm_9, "2009", "2009/06/21", "2010/2/10", 100, 'day', "Unique Daily Reboots") +#end_image() + +if ( TRUE ) +{ + rows_blocks <- year_hist_unique_recent(bm_9, "2009", "2009/06/21", "2010/2/10", 100, c(1,3,7,14,30), 'day', "Unique Daily Reboots") + + x<-NULL + blocks <- c(0,1,3,7,14,30) + for ( b in blocks ) { x<- c(x, paste("X", b, sep="")) } + + par(mfrow=c(1,1)) + par(mai=c(1,.7,.5,.4)) + start_image("bm_reboot_color.png", width=900) + + barplot(t(rows_blocks[x]), border=NA, col=c('purple', 'blue', 'green', 'red', 'pink', 'orange', 'yellow'), ylim=c(0,100), main="How Recently Node were Rebooted", xlab="Days from June-2009 to Jan-2010", space=0, legend=c("Only today", "Also within 1 day", "Also within 3 days", "Also within 7 days", "Also within 14 days", "Also within 30 days"), ylab="Frequency") + end_image() + + #barplot(rows_blocks$X0, border=NA, col=c('purple', 'blue', 'green', 'red', 'pink', 'orange', 'yellow'), ylim=c(0,100)) + + #par(mfrow=c(6,1)) + #par(mai=c(.1,.7,.1,.1)) + #barplot(rows_blocks$X0, border=NA, col=c('purple'), ylim=c(0,100)) + #barplot(rows_blocks$X1, border=NA, col=c('blue'), ylim=c(0,100)) + #barplot(rows_blocks$X3, border=NA, col=c('green'), ylim=c(0,100)) + #barplot(rows_blocks$X7, border=NA, col=c('red'), ylim=c(0,100)) + #barplot(rows_blocks$X14, border=NA, col=c('pink'), ylim=c(0,100)) + #barplot(rows_blocks$X30, border=NA, col=c('orange'), ylim=c(0,100)) + + shapiro.test(rows_blocks$X0[ rows_blocks$X0 < 50 ]) + shapiro.test(rows_blocks$X1[ rows_blocks$X1 < 50 ]) + shapiro.test(rows_blocks$X3[ rows_blocks$X3 < 50 ]) + shapiro.test(rows_blocks$X7[ rows_blocks$X7 < 50 ]) + shapiro.test(rows_blocks$X14[ rows_blocks$X14 < 50 ]) + shapiro.test(rows_blocks$X30[ rows_blocks$X30 < 50 ]) +} + + +#image <- reboot_image(t_9, "2009", "2009/06/21", "2010/2/10", 0, 'day') +#myImagePlot(image) + +start_image("st_bm_reboots.png", width=400, height=600) +image <- reboot_image(bm_9, "2009", "2009/06/21", "2010/2/10", 0, 'day', title="BootManager Reboots for all Nodes") +end_image() + +start_image("st_api_event_reboots.png", width=800, height=600) +image2 <- reboot_image(bm_api, "2009", "2008/06/21", "2010/2/10", 0, 'day', title= "API Reboot Events for all Nodes") +end_image() + +reboot_frequency <- function ( img ) +{ + d <- dim(img) + # for each row + f <- NULL + for ( i in seq(1:d[1]) ) + { + r <- img[i,] + f <- c(f, sum(r)) + } + return (f); +} + +reboot_events <- function ( img ) +{ + d <- dim(img) + # for each row + f <- NULL + for ( i in seq(1:d[2]) ) + { + c <- img[,i] + f <- c(f, sum(c)) + } + return (f); +} + +time_to_reboot <- function (img, first=0, last=0) +{ + d <- dim(img) + # for each row + f <- NULL + for ( i in seq(1:d[1]) ) + { + if (last == 0 ) { last <- length(img[i,]) } + r <- img[i,first:last] + # find first reboot + start_i <- 1 + while ( start_i < length(r) && r[start_i] != 1 ) + { + start_i <- start_i + 1 + } + end_i <- start_i + + while ( start_i < length(r) ) + { + if ( r[start_i] == 1 && start_i != end_i) + { + f <- c(f, start_i-end_i) + while ( start_i < length(r) && r[start_i] == 1 ) { start_i <- start_i + 1 } + end_i <- start_i + } + start_i <- start_i + 1 + } + } + return (f); +} + +find_95 <- function (cdf, low=0, high=1000) +{ + # find the lowest point past the 95th percentile. + while ( high - low > 1) + { + c_low <- cdf(low) + c_mid <- cdf(low+floor((high-low)/2)) + c_high <- cdf(high) + + c_min <- min(min(abs(0.95-c_low), abs(0.95-c_mid)), abs(0.95-c_high)) + + if ( c_mid > 0.95 ) { + high <- high - floor((high-low)/2) + print (sprintf("adjust high: %s\n", high)); + } else if ( c_mid <= 0.95 ) { + low <- low + floor((high-low)/2) + print (sprintf("adjust low: %s\n", low)); + } + + #swap<-0 + #if ( c_min == abs(0.95-c_mid) ) { + # # is it in top half or bottom half? + # print (sprintf("middle\n")); + # if ( abs(0.95-c_low) < abs(0.95-c_high) ) { + # low <- low + floor((high-low)/2) + # print (sprintf("adjust low: %s\n", low)); + # } else { #if ( c_min == abs(0.95-c_high) ) { + # high <- high - floor((high-low)/2) + # print (sprintf("adjust high: %s\n", high)); + # } + #} else { + # if ( c_min == abs(0.95-c_low) ) { + # high <- high - floor((high-low)/2) + # print (sprintf("adjust high: %s\n", high)); + # } else { #if ( c_min == abs(0.95-c_high) ) { + # low <- low + floor((high-low)/2) + # print (sprintf("adjust low: %s\n", low)); + # } + #} + } + return (low) +} + +#0,193-402,length(r) +ttr1 <- time_to_reboot(image,9,122) +ttr2 <- time_to_reboot(image,131,223) + +ttr8 <- time_to_reboot(image2,0,193) +ttr9 <- time_to_reboot(image2,402) + +x1 <- ecdf(c(ttr1, ttr2)) +x2 <- ecdf(c(ttr8,ttr9)) +start_image("reboot_ttr_cdf.png") +plot(x1, col.vert='red', col.hor="red", col.points="red", pch='*', xlab="Days to Reboot", ylab="Percentile", verticals=TRUE, xlim=c(0,170), main="CDF of Days to Reboot for BM & API Events") +plot(x2, col.vert='blue', col.hor="blue", col.points="blue", pch=20, verticals=TRUE, add=TRUE) +legend(130, 0.15, legend=c("BM Uploads", "API Events"), col=c('red', 'blue'), pch=c(42, 20)) +abline(0.95,0) +v1<-find_95(x1) +v2<-find_95(x2) +abline(v=v1, col="pink") +abline(v=v2, col="light blue") +axis(1, labels=c(v1,v2), at=c(v1,v2)) + +abline(v=7, col="grey") +abline(v=14, col="grey") +abline(v=21, col="grey") +abline(v=28, col="grey") +abline(v=42, col="grey") +abline(v=56, col="grey") +end_image() + +e <- reboot_events(image) +e2 <- reboot_events(image2) +x1 <- ecdf(e) +x2 <- ecdf(e2) + +start_image("reboot_days_cdf.png") +plot(x1, col.vert='red', col.hor="red", col.points="red", pch='*', xlab="Reboots in a Single Day", ylab="Percentile", verticals=TRUE, xlim=c(0,100), main="CDF of Reboots per Day for BM & API Events") +plot(x2, col.vert='blue', col.hor="blue", col.points="blue", pch=20, verticals=TRUE, add=TRUE) +legend(75, 0.15, legend=c("BM Uploads", "API Events"), col=c('red', 'blue'), pch=c(42, 20)) +abline(0.95,0) +v1<-find_95(x1) +v2<-find_95(x2) +abline(v=v1, col="pink") +abline(v=v2, col="light blue") +axis(1, labels=c(v1,v2), at=c(v1,v2)) +end_image() + + + +f <- reboot_frequency(image) +f2 <- reboot_frequency(image2) +x1 <- ecdf(f) +x2 <- ecdf(f2) + +start_image("reboot_node_cdf.png") +par(mfrow=c(1,1)) +par(mai=c(.9,.8,.5,.4)) +plot(x1, col.vert='red', col.hor="red", col.points="red", pch='*', xlab="Reboots per Node", ylab="Percentile", verticals=TRUE, xlim=c(0,100), main="CDF of Reboot per Node for BM & API Events") +plot(x2, col.vert='blue', col.hor="blue", col.points="blue", pch=20, verticals=TRUE, add=TRUE) +legend(75, 0.15, legend=c("BM Uploads", "API Events"), col=c('red', 'blue'), pch=c(42, 20)) +abline(0.95,0) +v1<-find_95(x1) +v2<-find_95(x2) +abline(v=v1, col="pink") +abline(v=v2, col="light blue") +axis(1, labels=c(v1,v2), at=c(v1,v2)) +end_image() + + + +par(mfrow=c(1,1)) +par(mai=c(.7,.7,.7,.7)) diff --git a/statistics/functions.r b/statistics/functions.r new file mode 100644 index 0000000..3411586 --- /dev/null +++ b/statistics/functions.r @@ -0,0 +1,403 @@ +slices <- function (x, components=FALSE) +{ + m<-x$memsize; + d<-x$disksize/250; + c<-x$cpuspeed; + r<-x$numcores; + if ( components ) { + a<-c(m,d,c*r); + } else { + a<-(m+d+c*r); + } + return(a/2); +} + +slices_2 <- function (x, components=FALSE) +{ + # Define an ideal, then scale each measurement relative to the ideal. + # If it matches it will be more or less than 1 + # does this scale (up or down) linearly, and why not? + + # 4, 2.4x2, 1000; 4, 3.2x1, 320; 1, 2.4x1, 160 + ideal_m <- 3.4; # GB + ideal_c <- 2.4; # GHz + ideal_d <- 450; # GB + ideal_r <- 2; + + m<-x$memsize/ideal_m; + d<-x$disksize/ideal_d; + c<-x$cpuspeed/ideal_c; + r<-x$numcores/ideal_r; + # ideal is 1 + + if ( components ) { + a<-c(m,d,c*r); + } else { + a<-(m+d+c*r); + } + + return (a/3*5); +} + +slices_3 <- function (x, components=FALSE) +{ + # Define an ideal, then scale each measurement relative to the ideal. + # If it matches it will be more or less than 1 + # does this scale (up or down) linearly, and why not? + + # 4, 2.4x2, 1000; 4, 3.2x1, 320; 1, 2.4x1, 160 + ideal_m <- 3.4; #GB + ideal_c <- 2.4; #GHz + ideal_d <- 450; #GB + ideal_r <- 2; + ideal_bw <- 100000; #Kbps + + m<-x$memsize/ideal_m; + d<-x$disksize/ideal_d; + c<-x$cpuspeed/ideal_c; + r<-x$numcores/ideal_r; + b<-log(x$bwlimit)/log(ideal_bw); + # ideal is 1 + + if ( components ) { + a<-c(m,d,c*r,b); + } else { + a<-(m+d+c*r+b); + } + + return (a/4*5); +} + +slices_4 <- function (x, components=FALSE) +{ + # Define an ideal, then scale each measurement relative to the ideal. + # If it matches it will be more or less than 1 + # does this scale (up or down) linearly, and why not? + + # 4, 2.4x2, 1000; 4, 3.2x1, 320; 1, 2.4x1, 160 + ideal_m <- 3.4; #GB + ideal_c <- 2.4; #GHz + ideal_d <- 450; #GB + ideal_r <- 2; + ideal_bw <- 100000; #Kbps + ideal_pcu <- 1; + + m<-x$memsize/ideal_m; + d<-x$disksize/ideal_d; + c<-x$cpuspeed/ideal_c; + r<-x$numcores/ideal_r; + b<-log(x$bwlimit)/log(ideal_bw); + p<-x$pcustatus/ideal_pcu; + # ideal is 1 + + if ( components ) { + a<-c(m,d,c*r,b,p); + } else { + a<-(m+d+c*r+b+p); + } + + return (a/5*5); +} + +index_of_bin <- function (h, value) +{ + index <- 0; + + for (i in sequence(length(h$breaks))) + { + # first bin + + if ( value < h$breaks[1] ) + { + index <- 1; + break; + } + + # last bin + + if ( i == length(h$breaks) ) + { + # end of line + index <- i; + break; + } + + # all other bins + + if ( value > h$breaks[i] && value <= h$breaks[i+1] ) + { + index <- i+1; + break; + } + } + if ( index == 0 ) { + warning("index == 0, no bin assigned for value: ", value); + } + + return (index); +} + +start_image <- function (name, width=480, height=480) +{ + png(name, width=width, height=height); +} + +end_image <- function () +{ + dev.off() +} + + +plot_rt_hist <- function (t, imagename=0) +{ + d2 <- (t$lastreply - t$start) + std_dev <- sd(log(d2)) + m <- mean(log(d2)) + print(sprintf("mean: %s, stddev: %s\n", m, std_dev)); + + if ( imagename != 0 ) { start_image(imagename) } + + h<-hist(log(d2), + xlab="Hours between ticket creation and final reply", + main="Time to Final Reply for RT Tickets", axes=FALSE) + + a<-exp(h$breaks)/(60*60) # convert units from log(secs) to hours + axis(1,labels=signif(a,2), at=h$breaks) + axis(2) + + x<-seq(min(h$breaks),max(h$breaks),length=500) + y<-dnorm(x,mean=m, sd=std_dev) + + # scale y to the size of h's 'counts' vector rather than the density function + lines(x,y*max(h$counts)/max(y)) + if ( imagename != 0 ) { end_image() } +} + +year_hist <- function (t, year, from, to, max, type="week", title="Histogram for Tickets in") +{ + dates <-seq(as.Date(from), as.Date(to), type) + months <- format(dates, "%b-%d") + hbreaks<-unclass(as.POSIXct(dates)) + h<-hist(t$start, breaks=hbreaks, plot=FALSE) + main<-sprintf(paste(title, "%s: MEAN %s\n"), year, mean(h$counts)) + print(main); + if ( max == 0 ) { + max = max(h$counts) + } + plot(h, ylim=c(0,max), main=main, axes=FALSE) + axis(1, labels=months, at=hbreaks) + axis(2) + abline(mean(h$counts), 0, col='grey') + #qqnorm(h$counts) + #qqline(h$counts) +} + +year_hist_unique <- function (t, year, from, to, max, type="week", title="Histogram for Tickets in") +{ + dates <-seq(as.Date(from), as.Date(to), type) + months <- format(dates, "%b-%d") + hbreaks<-unclass(as.POSIXct(dates)) + + rows <- NULL + for ( d in hbreaks ) + { + d_end <- d+60*60*24 + t_sub <- t[which(t$start > d & t$start <= d_end),] + rows <- rbind(rows, c('start'=d, 'reboots'=length(unique(t_sub$hostname))) ) + } + rows <- data.frame(rows) + + if ( max == 0 ) { + max = max(rows$reboots) + } + main<-sprintf(paste(title, "%s: MEAN %s\n"), year, mean(rows$reboots)) + print(main); + barplot(rows$reboots, ylim=c(0,max), main=main, axes=FALSE, space=0) + #plot(h, ylim=c(0,max), main=main, axes=FALSE) + axis(1, labels=months, at=seq(1,length(hbreaks))) + axis(2) + abline(mean(rows$reboots), 0, col='grey') + #qqnorm(h$counts) + #qqline(h$counts) + return (rows); +} + +year_hist_unique_recent <- function (t, year, from, to, max, blocks=c(1,3,7,14,30), type="week", title="Histogram for Tickets in") +{ + dates <-seq(as.Date(from), as.Date(to), type) + months <- format(dates, "%b-%d") + hbreaks<-unclass(as.POSIXct(dates)) + + rows <- NULL + + + for ( d in hbreaks ) + { + # initialize row for this iteration + row <- NULL + row[as.character(0)] <- 0 + for ( block in blocks ) { + row[as.character(block)] <- 0 + } + + # find the range : d plus a day + d_end <- d+60*60*24 + # find unique hosts in this day range + t_sub <- t[which(t$start > d & t$start <= d_end),] + unique_hosts <- unique(t_sub$hostname) + if (length(unique_hosts) == 0 ) { + rows <- rbind(rows, c('start'=d, row)) + next + } + + #print(sprintf("unique_hosts: %s\n", unique_hosts)); + print(sprintf("unique_hosts: %s\n", length(unique_hosts))); + + for ( host in as.character(unique_hosts) ) + { + found <- 0 + for ( block in blocks ) + { + #print(sprintf("date: %s, block: -%s, %s\n", d, block, host)); + #print(sprintf("row: %s\n", row)); + # find the range : 'block' days ago to 'd' + d_back <- d - 60*60*24 * block + t_back_sub <- t[which(t$start > d_back & t$start <= d),] + u <- unique(t_back_sub$hostname) + if ( length(u[u==host]) >= 1) + { + # add to block_count and go to next host. + found <- 1 + i <- as.character(block) + row[i] <- row[i] + 1 + break + } + } + if ( found == 0 ) + { + # no range found + row['0'] <- row['0'] + 1 + } + } + rows <- rbind(rows, c('start'=d, row)) + } + + rows <- data.frame(rows) + + if ( max == 0 ) { + max = max(rows['0']) + } + #main<-sprintf(paste(title, "%s: MEAN %s\n"), year, mean(rows$reboots)) + #print(main); + #barplot(rows$reboots, ylim=c(0,max), main=main, axes=FALSE, space=0) + ##plot(h, ylim=c(0,max), main=main, axes=FALSE) + #axis(1, labels=months, at=seq(1,length(hbreaks))) + #axis(2) + #abline(mean(rows$reboots), 0, col='grey') + #qqnorm(h$counts) + #qqline(h$counts) + return (rows); +} + +source("myImagePlot.R") +reboot_image <- function (t, year, from, to, max=0, type="week", title="") +{ + dates <-seq(as.Date(from), as.Date(to), type) + months <- format(dates, "%b-%d") + hbreaks<-unclass(as.POSIXct(dates)) + + rows <- NULL + image <- matrix(data=0, nrow=max(as.numeric(t$hostname)), ncol=length(hbreaks)) + #image <- matrix(data=0, nrow=length(unique(t$hostname)), ncol=length(hbreaks)) + + #for ( d in hbreaks ) + for ( i in seq(1, length(hbreaks)) ) + { + # find the range : d plus a day + d <- hbreaks[i] + d_end <- d+60*60*24 + # find unique hosts in this day range + t_sub <- t[which(t$start > d & t$start <= d_end),] + unique_hosts <- unique(t_sub$hostname) + if (length(unique_hosts) == 0 ) { next } + + for ( host in unique_hosts ) + { + image[host,i] <- 1 + } + } + + myImagePlot(image, xLabels=months, yLabels=c(""), title=title) + + #found <- 0 + #for ( block in blocks ) + #{ + #print(sprintf("date: %s, block: -%s, %s\n", d, block, host)); + #print(sprintf("row: %s\n", row)); + # find the range : 'block' days ago to 'd' + # d_back <- d - 60*60*24 * block + # t_back_sub <- t[which(t$start > d_back & t$start <= d),] + # u <- unique(t_back_sub$hostname) + # if ( length(u[u==host]) >= 1) + # { + # # add to block_count and go to next host. + # found <- 1 + # i <- as.character(block) + # row[i] <- row[i] + 1 + # break + # } + #} + #if ( found == 0 ) + #{ + # # no range found + # row['0'] <- row['0'] + 1 + #} + #} + #rows <- rbind(rows, c('start'=d, row)) + + #rows <- data.frame(rows) + + #if ( max == 0 ) { + # max = max(rows['0']) + #} + #main<-sprintf(paste(title, "%s: MEAN %s\n"), year, mean(rows$reboots)) + #print(main); + #barplot(rows$reboots, ylim=c(0,max), main=main, axes=FALSE, space=0) + ##plot(h, ylim=c(0,max), main=main, axes=FALSE) + #axis(1, labels=months, at=seq(1,length(hbreaks))) + #axis(2) + #abline(mean(rows$reboots), 0, col='grey') + #qqnorm(h$counts) + #qqline(h$counts) + return (image); +} + +add_year <- function (t) +{ + t$year <- c(0) # assign new column with zero value initially + for ( i in 1:length(t$start) ) + { + d <- as.POSIXlt(t$start[i], origin="1970-01-01") + year <- d$year + 1900 # as.numeric(format(d, "%Y")) + t$year[i] <- year + } + return (t); +} + +add_timestamp <- function (t) +{ + t$start <- c(0) # assign new column with zero value initially + for ( i in 1:length(t$date) ) + { + tstamp <-unclass(as.POSIXct(t$date[i], origin="1970-01-01"))[1] + t$start[i] <- tstamp + } + return (t); +} + +abline_at_date <- function (date, col='black', lty=1, format="%Y-%m-%d") +{ + ts <-unclass(as.POSIXct(date, format=format, origin="1970-01-01"))[1] + abline(v=ts, col=col, lty=lty) + return (ts); +} diff --git a/statistics/node_history_may0809.r b/statistics/node_history_may0809.r new file mode 100644 index 0000000..75f3c52 --- /dev/null +++ b/statistics/node_history_may0809.r @@ -0,0 +1,58 @@ + +source("functions.r"); + +# data collected from M2 pickle files +dnc <- read.csv('daily-available-node-count.csv', sep=',', header=TRUE) + +dnc2<-add_timestamp(dnc) + +tstamp_08 <-unclass(as.POSIXct("2008-05-07", origin="1970-01-01"))[1] +dnc2 <- dnc2[which( dnc2$start > tstamp_08 ),] + + +dates <-seq(as.Date('2008-05-07'), as.Date('2009-05-07'), 'week') +months <- format(dates, "%b") +hbreaks<-unclass(as.POSIXct(dates)) + +x_start<-unclass(as.POSIXct("2008-05-07", origin="1970-01-01"))[1] +x_end <-unclass(as.POSIXct("2009-06-1", origin="1970-01-01"))[1] + +start_image("daily-node-count.png") +plot(dnc2$start[which(!is.na(dnc2$available))], dnc2$registered[which(!is.na(dnc2$available))], + type='l', col='blue', ylim=c(0,900), xlim=c(x_start, x_end), + xlab="Date", ylab="Node Count", axes=F) +lines(dnc2$start[which(!is.na(dnc2$available))], dnc2$available[which(!is.na(dnc2$available))], type='l', col='red', ylim=c(0,900)) +axis(2) +axis(1, labels=months, at=hbreaks) + + +tstamp_0610 <-abline_at_date("2008-06-10", col='grey20', lty=2) +# dates takes from reboot_image() output for API events. +tstamp_0815 <-abline_at_date("2008-08-15", col='grey20', lty=2) +tstamp_0905 <-abline_at_date("2008-09-05", col='grey70') +tstamp_0924 <-abline_at_date("2008-09-24", col='grey20', lty=2) +tstamp_1015 <-abline_at_date("2008-10-15", col='grey20', lty=2) +tstamp_1105 <-abline_at_date("2008-11-05", col='white', lty=2) +tstamp_1214 <-abline_at_date("2008-12-14", col='grey70') +tstamp_0223 <-abline_at_date("2009-02-23", col='grey70') +tstamp_0313 <-abline_at_date("2009-03-13", col='grey70') + + +text(x=c(tstamp_0610+(tstamp_0815-tstamp_0610)/2, + tstamp_0815+(tstamp_0905-tstamp_0815)/2, + tstamp_0924+(tstamp_1015-tstamp_0924)/2, + tstamp_1015+(tstamp_1105-tstamp_1015)/2, + tstamp_1214+(tstamp_0223-tstamp_1214)/2, + tstamp_0223+(tstamp_0313-tstamp_0223)/2), + y=c(0), + labels=c("Kernel bug", 'fix1', 'fix2', 'fix3', 'Notice bug', 'fix4')) #, 'fix 2', 'fix 3', 'fix 4')) + +legend(unclass(as.POSIXct("2009-03-13", origin="1970-01-01"))[1], 200, + cex=0.7, + legend=c("Registered", "Available", 'Kernel Update', 'MyOps Event'), + pch=c('-', '-', '-', '-'), + col=c('blue', 'red', 'grey20', 'grey70'), + lty=c(1, 1, 2, 1), merge=T) + +end_image() + diff --git a/statistics/prep.r b/statistics/prep.r new file mode 100644 index 0000000..bf7333d --- /dev/null +++ b/statistics/prep.r @@ -0,0 +1,161 @@ + +source("functions.r"); + +ikern <- read.csv("/Users/soltesz/Downloads/out.csv", TRUE, sep=",") +f<-factor(ikern$kernel_version, sort(unique(ikern$kernel_version)), sequence(length(unique(ikern$kernel_version)))) + +u<-ikern$uptime/(60*60*24) + +current_time <- as.numeric(format(Sys.time(), "%s")) +i<-(current_time-ikern$install_date)/(60*60*24) + +plot(f,u) + + +sites <- read.csv("/Users/soltesz/Downloads/sites.csv", TRUE, sep=",") +f<-factor(sites$status, sort(unique(sites$status)), sequence(length(unique(sites$status)))) + +s<-sites$sliver_count + +res <- read.csv("/Users/soltesz/Downloads/out_resources.csv", TRUE, sep=",") +library(lattice) +cloud(memsize ~ disksize * cpuspeed|numcores, data=res) + +x<-c(res[2],res[4],res[5]) +pairs(x) + + + +mdrc <- read.csv("/Users/soltesz/Downloads/out_resources.csv", TRUE, sep=",") + +stripchart(round(slices(mdrc)), method="jitter") +hist(round(slices(mdrc)),breaks=30) + +hist(round(slices(mdrc)),breaks=30,xlim=c(0,32)) +stripchart(round(slices(mdrc)), method="jitter", add=TRUE, jitter=30, at=50) + + +# bottom, left, top, right +par(mai=c(0,1,0.5,0.2)) +hist(round(slices(mdrc)),breaks=30,xlim=c(0,32)) +par(mai=c(1.0,1,0.5,0.2)) +stripchart(round(slices(mdrc))-0.5, method="jitter", jitter=20, xlim=c(0,32), ylim=c(-25,25), ylab="Raw Samples", xlab="Slice count as a function of Mem, CPU, Disk") + + +png("/Users/soltesz/Downloads/slices.png") +par(mfrow=c(2,1)) +par(mai=c(0,1,0.5,0.2)) +hist(round(slices(mdrc)),breaks=30,xlim=c(0,32), main="Distribution of Slice Count as Function of Mem, CPU, Disk") +par(mai=c(1.0,1,0.5,0.2)) +stripchart(round(slices(mdrc))-0.5, method="jitter", jitter=20, xlim=c(0,32), ylim=c(-25,25), ylab="Raw Samples", xlab="Slice count as a function of Mem, CPU, Disk for live Planetlab Machines") +dev.off() + + +#----------------------- + +f<-slices +f<-slices_2 + +s2<- f(mdrc, FALSE); +mdrc$score <- s2; +df <- data.frame(mdrc); +b<-30; + +# ---------------------- +### LOGINBASE +unique_loginbase_length <- length(unique(mdrc$loginbase)); +unique_lb <- list(loginbase=array(0,c(unique_loginbase_length)), + score=array(0,c(unique_loginbase_length)), + memsize=array(0,c(unique_loginbase_length)), + disksize=array(0,c(unique_loginbase_length)), + cpuspeed=array(0,c(unique_loginbase_length)) + ) + +for ( i in 1:length(mdrc$loginbase) ) +{ + r <- mdrc[i,]; + v <- f(r, TRUE); + unique_lb$loginbase[r$loginbase] <- r$loginbase; + unique_lb$score[r$loginbase] <- unique_lb$score[r$loginbase] + r$score; +} + +for ( i in 1:length(mdrc$loginbase) ) +{ + r <- mdrc[i,]; + v <- f(r, TRUE); + rscore <- unique_lb$score[r$loginbase] + unique_lb$memsize[r$loginbase] <- unique_lb$memsize[r$loginbase] + v[1]; + unique_lb$disksize[r$loginbase] <- unique_lb$disksize[r$loginbase] + v[2]; + unique_lb$cpuspeed[r$loginbase] <- unique_lb$cpuspeed[r$loginbase] + v[3]; +} + +df<- data.frame(unique_lb) + +h<- hist(df$score, breaks=b); +bins<-max(length(h$breaks),max(h$breaks)); +c<- array(0,c(bins)); +d<- array(0,c(bins)); +m<- array(0,c(bins)); +# foreach score value, find which range it falls into, +# then in three columns for cpu, mem, disk, record the fraction of each. +# then plot each sequence in a stacked graph, perhaps beside h$counts +for ( i in 1:length(df$cpuspeed) ) +{ + r <- df[i,]; + s <- index_of_bin(h, r$score); # find bin position... + # take fraction that each component contributes to the total, and add to sum + + m[s] <- m[s] + unique_lb$memsize[r$loginbase]; + d[s] <- d[s] + unique_lb$disksize[r$loginbase]; + c[s] <- c[s] + unique_lb$cpuspeed[r$loginbase]; +} + +# ---------------------- +### HOSTS +# --- get plot of contributing parts +h<- hist(df$score, breaks=b); +bins<-max(length(h$breaks),max(h$breaks)); +c<- array(0,c(bins)); +d<- array(0,c(bins)); +m<- array(0,c(bins)); +# foreach score value, find which range it falls into, +# then in three columns for cpu, mem, disk, record the fraction of each. +# then plot each sequence in a stacked graph, perhaps beside h$counts +for ( i in 1:length(df$cpuspeed) ) +{ + r <- df[i,1:6]; + s <- index_of_bin(h, r$score); # find bin position... + # take fraction that each component contributes to the total, and add to sum + v <- f(r, TRUE); + m[s] <- m[s] + v[1]/r$score; + d[s] <- d[s] + v[2]/r$score; + c[s] <- c[s] + v[3]/r$score; +} + + +#a <- array(c(c,d,m), dim=c(bins, 3)); +a <- array(c(c), dim=c(bins, 3)); + +#png("/Users/soltesz/Downloads/slice_policy_1.png") +par(mfrow=c(2,1)) +par(mai=c(0.5,1,0.5,0.2)) +barplot(c(0,h$counts), + xlab="slice count", + main="Distribution of Per-node 'Scores' Calculated from Mem/Disk/CPU", + ylab="Total Frequency", + ylim=c(0,160)) +par(mai=c(1.0,1,0,0.2)); +barplot(t(a), + legend=c("CPUspeed (GHz)", "DISKsize (GB)", "MEMsize (GB)"), + col=c("pink", "lightblue", "lightgreen"), + ylim=c(0,160), + ylab="Total with Break-down", + xlab="Per-node Score", + names.arg=h$breaks, +); +#dev.off() + + + +#a <- list(cpuspeed=c, memsize=m, disksize=d); +# barplot(t(a), legend=c("cpuspeed", "disksize", "memsize"), col = c("pink", "lightblue", "lightgreen"), ylab="Total Contribution by CPU, Disk, Mem ") diff --git a/statistics/rpm_dist.r b/statistics/rpm_dist.r new file mode 100644 index 0000000..497971a --- /dev/null +++ b/statistics/rpm_dist.r @@ -0,0 +1,125 @@ +##### + +#system("URL='https://monitor.planet-lab.org:443/monitor/query?object=nodes&nodehistory_hostname=&hostname=on&observed_status=on&rpms=on&rpmvalue=planetlab&tg_format=plain'; curl -s --insecure $URL | grep -v DOWN | grep -v DEBUG | /usr/share/monitor/statistics/hn2rpms.py > out_rpm.csv"); +#system("grep MD5SUMS /usr/share/monitor/monitor.log | grep -v measurement-lab | awk 'BEGIN { printf \"hostname,yumsum\\n\" } {if ( $3 != \"\") { printf \"%s,%s\\n\", $2,$3 } }' > yumsum.csv") + +r <- read.csv("out_rpm.csv") +ys<- read.csv('yumsum.csv') +m<-merge(r,ys, by="hostname") + +s<-table(factor(r$NodeManager), factor(r$kernel), factor(r$iptables)); +plot(s); + +ideal<-c(NodeManager='NodeManager-1.8-12.planetlab.1', + NodeUpdate='NodeUpdate-0.5-4.planetlab', + codemux='codemux-0.1-13.planetlab', + fprobe.ulog='fprobe-ulog-1.1.3-0.planetlab', + ipod='ipod-2.2-1.planetlab', + iproute='iproute-2.6.16-2.planetlab', + iptables='iptables-1.3.8-9.planetlab', + kernel='kernel-2.6.22.19-vs2.3.0.34.39.planetlab', + madwifi='madwifi-0.9.4-2.6.22.19.3.planetlab', + monitor.client='monitor-client-3.0-17.planetlab', + monitor.runlevelagent='monitor-runlevelagent-3.0-17.planetlab', + pl_mom='pl_mom-2.3-1.planetlab', + pl_sshd='pl_sshd-1.0-11.planetlab', + pyplnet='pyplnet-4.3-3.planetlab', + util.vserver.pl='util-vserver-pl-0.3-17.planetlab', + vserver.planetlab.f8.i386='vserver-planetlab-f8-i386-4.2-12.2009.06.23', + vserver.systemslices.planetlab.f8.i386='vserver-systemslices-planetlab-f8-i386-4.2-12.2009.06.23', + vsys='vsys-0.9-3.planetlab', + vsys.scripts='vsys-scripts-0.95-11.planetlab'); + +r_summary <- lapply(r[,4:23], summary) +for (i in 1:length(r_summary)) +{ + n<-sort(unlist(r_summary[i]), decreasing=TRUE) + names(n[1]) +} + +as.numeric(factor(ideal[1], levels(r$NodeManager))) + +cv <- function ( row , rows=566, start_col=4, end_col=23, ref=NULL) +{ + ret<-NULL; + for ( i in 1:rows ) + { + r_l <-NULL + for ( name in names(row) ) + { + # NOTE: this doesn't work unless the levels in row are a subset of ref's levels. + x<-as.numeric(factor(row[i,name], levels(factor(unlist(row[name]))))); + r_l <- c(r_l, x); + } + #r<-as.numeric(row[i,start_col:end_col]); + str<- paste(as.character(r_l), collapse="-", sep="-"); + ret<- rbind(ret, str); + } + return (ret); +} + +grow <- function (d, column, val) +{ + r <- which(d[column] == val); + return (d[r,]); +} + +cv(m, length(m$hostname)); +i<-data.frame(t(ideal)); +cv(i, 1, 1, length(ideal)); + + # --- + +x<-cv(r, length(r$hostname)) +x2<-factor(x) +# plot the frequency of each RPM package combination +barplot(sort(table(x2), decreasing=TRUE), + ylim=c(0, max(table(x2))), + xlab="Unique Package Combinations", + ylab="Frequency", + axisnames=FALSE, + main=paste("Distribution of Packages for", length(r$hostname),"nodes")); + +png("/Users/soltesz/Downloads/rpm_plpackages_distribution_1.png", + width=640, + height=300, + unit="px") +# 1x1 grid, with 1" margins on the bottom/left, 0.5" on the top/right +par(mfrow=c(1,1)); +par(mai=c(1,1,0.5,0.5)); +barplot(sort(table(x2), decreasing=TRUE), + ylim=c(0, max(table(x2))), + xlab="Unique Package Combinations", + ylab="Frequency", + axisnames=FALSE, + main=paste("Distribution of Packages for", length(r$hostname),"nodes")); +dev.off() + + + +#convert_rpm <- function ( row ) +#{ +# c <- as.character(row$rpms) +# rpm_list <- unlist(strsplit(c, " ")) +# rpm_sort <- paste(sort(rpm_list), collapse="::"); +# return (rpm_sort); +#} + +#s<-convert_rpm(r) + +#for ( row in r[,] ) +#{ +# c <- as.character(row$rpms) +# rpm_list <- unlist(strsplit(c, " ")) +# row$rpm_sort <- paste(sort(rpm_list), collapse="::"); +# +# #for ( rpm in rpm_list ) +# #{ +# # fields <- unlist(strsplit(rpm, "-")); +# # s <- sort(fields); +# #} +#} +# +#s<-sort(rpm_list); + + diff --git a/statistics/rt_data.r b/statistics/rt_data.r new file mode 100644 index 0000000..9ba227b --- /dev/null +++ b/statistics/rt_data.r @@ -0,0 +1,415 @@ + + +source("functions.r"); + +# system("parse_rt_data.py 3 > rt_data.csv"); +#t <- read.csv('rt_data.csv', sep=',', header=TRUE) +t <- read.csv('rt_data_2004-2010.csv', sep=',', header=TRUE) + +par(mfrow=c(2,1)) + +h<-hist(log(log(t$replies)), breaks=50) +lines(h$breaks[which(h$counts!=0)], h$counts[which(h$counts!=0)]) +h<-hist(log(log(log(t$replies))), breaks=50) +lines(h$breaks[which(h$counts!=0)], h$counts[which(h$counts!=0)]) + + +par(mfrow=c(1,1)) + +t2 <- t[which(t$complete == 1),] +d <- (t2$lastreply - t2$start)/(60*60) + +#start_image("rt_hist_ttc_1000.png") +#hist(d[which(d<1000)], xlab="hours from creation to last reply", breaks=30) +#end_image() +# +#start_image("rt_hist_ttc_200.png") +#hist(d[which(d<200)], xlab="hours from creation to last reply", breaks=30) +#end_image() +# +#start_image("rt_hist_ttc_50.png") +#hist(d[which(d<50)], xlab="hours from creation to last reply", breaks=30) +#end_image() +# +#start_image("rt_hist_ttc_10.png") +#hist(d[which(d<10)], xlab="hours from creation to last reply", breaks=30) +#end_image() +# +#d2 <- (t2$lastreply - t2$start) +#h<-hist(log(d2), plot=F, breaks=50) +#lines(h$breaks[which(h$counts!=0)], h$counts[which(h$counts!=0)]) + + +# this doesn't work as I would like. I think the bins aren't as I expect +#h <- hist(d, plot=F, breaks=c(seq(0,max(d)+1, .1))) +#plot(h$counts, log="x", pch=20, col="blue", +# main="Log-normal distribution", +# xlab="Value", ylab="Frequency") + +#plot(log(d2)) +#plot(ecdf(d2)) + +tstamp_45 <-unclass(as.POSIXct("2005-01-01", origin="1960-01-01"))[1] +tstamp_56 <-unclass(as.POSIXct("2006-01-01", origin="1960-01-01"))[1] +tstamp_67 <-unclass(as.POSIXct("2007-01-01", origin="1960-01-01"))[1] +tstamp_78 <-unclass(as.POSIXct("2008-01-01", origin="1960-01-01"))[1] +tstamp_89 <-unclass(as.POSIXct("2009-01-01", origin="1960-01-01"))[1] +tstamp_90 <-unclass(as.POSIXct("2010-01-01", origin="1960-01-01"))[1] + + +t_4 <- t2[which( t2$start < tstamp_45 ),] +t_5 <- t2[which( t2$start >= tstamp_45 & t2$start < tstamp_56 ),] +t_6 <- t2[which( t2$start >= tstamp_56 & t2$start < tstamp_67 ),] +t_7 <- t2[which( t2$start >= tstamp_67 & t2$start < tstamp_78 ),] +t_8 <- t2[which( t2$start >= tstamp_78 & t2$start < tstamp_89 ),] +t_9 <- t2[which( t2$start >= tstamp_89 & t2$start < tstamp_90 ),] +t_10 <- t2[which( t2$start >= tstamp_90 ),] + +par(mfrow=c(4,1)) +plot_rt_hist(t_4) +plot_rt_hist(t_5) +plot_rt_hist(t_6) +plot_rt_hist(t_7) +plot_rt_hist(t_8) +plot_rt_hist(t_9) +par(mfrow=c(1,1)) + +start_image("rt_support_seasonal.png") +par(mfrow=c(6,1)) +par(mai=c(.3,.3,.3,.3)) + +# start dates on Sunday to align all weeks with weekend boundaries. +year_hist(t_4, "2004", "2003/12/28", "2005/1/7", 85) +year_hist(t_5, "2005", "2005/1/2", "2006/1/7", 85) +year_hist(t_6, "2006", "2006/1/1", "2007/1/7", 85) +year_hist(t_7, "2007", "2006/12/31", "2008/1/7", 85) +year_hist(t_8, "2008", "2007/12/30", "2009/1/7", 85) +year_hist(t_9, "2009", "2008/12/28", "2010/1/30", 85) +end_image() + +par(mai=c(0.7,0.7,0.7,0.7)) +par(mfrow=c(1,1)) + + +tstamp <-unclass(as.POSIXct("2008-01-01", origin="1960-01-01")) +t_67 <- t2[which( t2$start < tstamp[1] ),] +t_89 <- t2[which( t2$start >= tstamp[1] ),] + + +# install.packages('sn') +require(sn) +par(mfrow=c(6,1)) +par(mai=c(0.3,0.3,0.3,0.3)) + +#start_image("rt_hist_ttc_1000.png") +time_hist <- function (t, lessthan, year, log=T, breaks=30, xlim=c(-4,10), ylim=c(0,150)) +{ + d <- (t$lastreply - t$start)/(60*60) + main = sprintf("Histogram of d<%s for %s", lessthan, year); + if ( log ) + { + d <- log(d[which(d= tstamp),] + +#end_image() +h4<-time_hist(t_4, 10000, "2004") +h5<-time_hist(t_5, 10000, "2005") +h6<-time_hist(t_6, 10000, "2006") +#h7<-time_hist(t_7, 10000, "2007") +h7a<-time_hist(t_7a, 10000, "2007") +h7b<-time_hist(t_7b, 10000, "2007") +h8<-time_hist(t_8, 10000, "2008") +h9<-time_hist(t_9, 10000, "2009") + +tstamp <-unclass(as.POSIXct("2009-09-01", origin="1960-01-01")) +m_9a <- m_9[which(m_9$start < tstamp),] +m_9b <- m_9[which(m_9$start >= tstamp),] + +split_by_time <- function (t, datestr) +{ + tstamp <-unclass(as.POSIXct(datestr, origin="1960-01-01")) + a <- t[which(t$start < tstamp),] + b <- t[which(t$start >= tstamp),] + v<- list('before'=a, 'after'=b) + return (v); +} + +mh7 <- time_hist(m_7, 10000, '2007') + +sm_8 <- split_by_time(m_8, "2008-07-01") +#mh8a <- time_hist(rbind(m_7, m_8$before, m_8$after), 10000, '2008') +#mh8a <- time_hist(rbind(m_7[which(log((m_7$lastreply-m_7$start)/(60*60))>2),]), 10000, '2008') +# m_7 is junk data + +mh_8 <- time_hist(sm_8$before, 10000, '2008') + +sm_9 <- split_by_time(m_9, "2009-09-01") + +mh_89 <- time_hist(rbind(sm_8$after, sm_9$before), 10000, '2009') +mh_9 <- time_hist(sm_9$after, 10000, '2009') + + +x<-seq(-8,10,0.01) +#x<- exp(x)/24 + +#my7<-dsn(x, dp=cp.to.dp(mh7$cp)) +my8<-dsn(x, dp=cp.to.dp(mh_8$cp)) +my89<-dsn(x, dp=cp.to.dp(mh_89$cp)) +my9<-dsn(x, dp=cp.to.dp(mh_9$cp)) + +y4<-dsn(x, dp=cp.to.dp(h4$cp)) +y5<-dsn(x, dp=cp.to.dp(h5$cp)) +y6<-dsn(x, dp=cp.to.dp(h6$cp)) +y7a<-dsn(x, dp=cp.to.dp(h7a$cp)) +y7b<-dsn(x, dp=cp.to.dp(h7b$cp)) +y8<-dsn(x, dp=cp.to.dp(h8$cp)) +y9<-dsn(x, dp=cp.to.dp(h9$cp)) + +start_image("rt_time_to_resolve.png") +par(mfrow=c(1,1)) +par(mai=c(1.0,0.7,0.7,0.7)) +# monitor +plot(x, my9, col='blue', type='l', axes=F, xlab="Days to Resolve", ylab="Density") +axis(1, labels=c(0.0001, 0.01, 0.1, 1, 5, 20, 100), at=c(0.0001, 0.01, 0.1, 1, 5, 20, 100)) +axis(2) +lines(x, my8, col='dodgerblue') +lines(x, my7, col='turquoise') +abline(v=x[which(my8==max(my8))]) +abline(v=x[which(my9==max(my9))]) + +# heavy +lines(x, y7a, col='green3') +lines(x, y4, col='green4') +lines(x, y5, col='greenyellow') + +abline(v=x[which(y4==max(y4))]) +abline(v=x[which(y5==max(y5))]) +abline(v=x[which(y7a==max(y7a))]) + +# light +lines(x, y7b, col='orange', type='l') +lines(x, y6, col='orange3') +lines(x, y8, col='firebrick2') +lines(x, y9, col='firebrick4') + +abline(v=x[which(y7b==max(y7b))]) +abline(v=x[which(y6==max(y6))]) +abline(v=x[which(y8==max(y8))]) +abline(v=x[which(y9==max(y9))]) + +end_image() + +whisker <- function (x0,y0,sd, length=0.05) +{ + arrows(x0, y0, x0, y0+sd, code=2, angle=90, length=length) + arrows(x0, y0, x0, y0-sd, code=2, angle=90, length=length) +} + +whisker2 <- function (x0,y0, y0_high, y0_low, col="black", length=0.05) +{ + arrows(x0, y0, x0, y0_high, code=2, angle=90, length=length, col=col) + arrows(x0, y0, x0, y0_low, code=2, angle=90, length=length, col=col) +} + +start_image("rt_aggregate_times.png") +par(mfrow=c(1,1)) +par(mai=c(1,1,1,1)) +par(mar=c(5,4,4,4)) + +s_list <- c(1519, 1596, 1112, 1591, 1019, 815) +m_list <- c(0,0,0, 119, 229, 251) +x_tick_list <- c(1, 2.5, 4, 5.5, 7, 8.5) +x_tt_resolve_list <- c(1, 2.5, 4, 5.2,5.8, 7, 8.5) +y_tt_resolve_list <- c( x[which(y4==max(y4))], + x[which(y5==max(y5))], + x[which(y6==max(y6))], + x[which(y7a==max(y7a))], + x[which(y7b==max(y7b))], + x[which(y8==max(y8))], + x[which(y9==max(y9))]) + + +y_mean_list <- c( h4$cp['mean'], + h5$cp['mean'], + h6$cp['mean'], + h7a$cp['mean'], + h7b$cp['mean'], + h8$cp['mean'], + h9$cp['mean']) + +y_sd_list <- c( h4$cp['s.d.'], + h5$cp['s.d.'], + h6$cp['s.d.'], + h7a$cp['s.d.'], + h7b$cp['s.d.'], + h8$cp['s.d.'], + h9$cp['s.d.']) + +days_tt_resolve <- exp(y_tt_resolve_list)/24 +days_tt_resolve_low <- exp(y_tt_resolve_list-y_sd_list)/24 +days_tt_resolve_high <- exp(y_tt_resolve_list+y_sd_list)/24 + + +my_mean_list <- c( mh_8$cp['mean'], + mh_89$cp['mean'], + mh_9$cp['mean']) + +my_sd_list <- c( mh_8$cp['s.d.'], + mh_89$cp['s.d.'], + mh_9$cp['s.d.']) + +mx_tt_resolve_list <- c(7, 8, 8.5) +my_tt_resolve_list <- c(x[which(my8==max(my8))], + x[which(my89==max(my89))], + x[which(my9==max(my9))] ) + +mdays_tt_resolve <- exp(my_tt_resolve_list)/24 +mdays_tt_resolve_low <- exp(my_tt_resolve_list-my_sd_list)/24 +mdays_tt_resolve_high <- exp(my_tt_resolve_list+my_sd_list)/24 + + +days_y_sd_list <- exp(y_sd_list)/24 +mdays_y_sd_list <- exp(my_sd_list)/24 + +days_y_sd_list <- exp(y_sd_list)/24 +mdays_tt_resolve <- exp(my_tt_resolve_list)/24 + +plot(x_tt_resolve_list, days_tt_resolve, type='p', pch=c(22), axes=FALSE, + log='y', ylim=c(.01,350), xlab="Year", ylab='') +#points(x_tt_resolve_list, days_tt_resolve, pch=c(22)) + +lines(c(x_tt_resolve_list[1:2], x_tt_resolve_list[4]), c(days_tt_resolve[1:2], days_tt_resolve[4]), col='red') +lines(c(x_tt_resolve_list[3], x_tt_resolve_list[5:7]), c(days_tt_resolve[3], days_tt_resolve[5:7]), col='green') +#lines(mx_tt_resolve_list, mdays_tt_resolve) +#points(mx_tt_resolve_list, mdays_tt_resolve, pch=c(24)) + +lines(mx_tt_resolve_list, mdays_tt_resolve, col='blue') +points(mx_tt_resolve_list, mdays_tt_resolve, pch=c(24)) + +ticks<-c(0,0.01, 0.1, 0.5,1,2,4,7,21, 28, 7*8, 7*16) + +axis(1, labels=c('2004', '2005', '2006', '2007', '2008', '2009'), at=x_tick_list) +axis(2, labels=ticks, at=ticks) +mtext("Days to Resolve Message", 2, line=3) +#axis(2, labels=ticks, at=ticks) +#for (i in 1:length(days_y_sd_list) ) { +# whisker(x_tt_resolve_list[i], days_tt_resolve[i], days_y_sd_list[i]) +#} +#for (i in 1:length(mdays_y_sd_list) ) { +# whisker(mx_tt_resolve_list[i], mdays_tt_resolve[i], mdays_y_sd_list[i]) +#} +for (i in c(1,2,4) ) { + whisker2(x_tt_resolve_list[i], days_tt_resolve[i], + days_tt_resolve_high[i], days_tt_resolve_low[i], col='red') +} +for (i in c(3,5,6,7) ) { + whisker2(x_tt_resolve_list[i], days_tt_resolve[i], + days_tt_resolve_high[i], days_tt_resolve_low[i], col='green') +} +for (i in 1:length(mdays_y_sd_list) ) { + whisker2(mx_tt_resolve_list[i], mdays_tt_resolve[i], + mdays_tt_resolve_high[i], mdays_tt_resolve_low[i], col='blue') +} + +abline(h=21,col='grey90') +abline(h=2,col='grey90') +abline(h=0.5,col='grey80') + +legend(1, .05, + cex=0.7, + legend=c("Unstable Periods", "Stable Periods", "MyOps Messages"), + pch=c(22, 22, 24), + col=c('red', 'green', 'blue'), + lty=c(1, 1,1), merge=T) +end_image() +# install.packages('UsingR') +require(UsingR) + +m<-min(t_4$start) +d<-data.frame( + '2004'=t_4$start-m, + '2005'=t_5$start-m, + '2006'=t_6$start-m) +simple.violinplot(d) + +par(mfrow=c(3,3)) +par(mai=c(.3,.3,.3,.3)) +sp <- function (t) +{ + d <- (t$lastreply-t$start)/(60*60*24) + simple.violinplot(log(d)) +} +sp(t_4) +sp(t_5) +sp(t_6) +sp(t_7) +sp(t_8) +sp(t_9) +sp(m_8) +sp(m_89) +sp(m_9) + + +t3 <- add_year (t2) +m3 <- add_year (m2) + +par(mfrow=c(1,2)) +par(mai=c(.5,.5,.5,.5)) +t4<-t3[which((t3$lastreply-t3$start)/(60*60*24) < 20),] +t4<-t3 +simple.violinplot(log((lastreply-start)/(60*60*24)) ~ year, data=t4) + +m3[which((m3$lastreply-m3$start)< 0),] +m4<-m3[which((m3$lastreply-m3$start)/(60*60*24) < 100),] +simple.violinplot(log((lastreply-start)/(60*60*24)) ~ year, data=m4, log='y') + +meanof <- function (t, year) +{ + tx <- t[which(t$year == year),] + r<-sn.em(y=log((tx$lastreply-tx$start)/(60*60*24))) + return (r) +} + +t_sd <- p +t_p <- c( meanof(t3,2004)$cp['mean'], + meanof(t3,2005)$cp['mean'], + meanof(t3,2006)$cp['mean'], + meanof(t3,2007)$cp['mean'], + meanof(t3,2008)$cp['mean'], + meanof(t3,2009)$cp['mean'], + meanof(t3,2010)$cp['mean']) +points(t_p) +for (i in 1:length(t_sd) ) { + whisker(i, t_p[i], exp(t_sd[i])) +} + + + + + + +#for (i in 1:length(y_tt_resolve_list) ) { +# whisker(x_tt_resolve_list[i], scale_by*y_tt_resolve_list[i], scale_by*2) +#} +#for (i in 1:length(my_tt_resolve_list) ) { +# whisker(mx_tt_resolve_list[i], scale_by*my_tt_resolve_list[i], scale_by*2) +#} + +# +#end_image() +#par(mfrow=c(2,1)) +#plot_rt_hist(t_67) +#plot_rt_hist(t_89) +par(mfrow=c(1,1)) + diff --git a/statistics/rt_monitor_data.r b/statistics/rt_monitor_data.r new file mode 100644 index 0000000..62b63ff --- /dev/null +++ b/statistics/rt_monitor_data.r @@ -0,0 +1,132 @@ + + +source("functions.r"); + +# system("parse_rt_data.py 22 > rt_monitor_data.csv"); +m <- read.csv('rt_monitor_data.csv', sep=',', header=TRUE) + +par(mfrow=c(2,1)) + +h<-hist(log(log(m$replies)), breaks=50) +lines(h$breaks[which(h$counts!=0)], h$counts[which(h$counts!=0)]) +h<-hist(log(log(log(m$replies))), breaks=50) +lines(h$breaks[which(h$counts!=0)], h$counts[which(h$counts!=0)]) + + +par(mfrow=c(1,1)) + +m2 <- m[which(m$complete == 1),] +d <- (m2$lastreply - m2$start)/(60*60) + +#start_image("rt_hist_ttc_1000.png") +#hist(d[which(d<1000)], xlab="hours from creation to last reply", breaks=30) +#end_image() +# +#start_image("rt_hist_ttc_200.png") +#hist(d[which(d<200)], xlab="hours from creation to last reply", breaks=30) +#end_image() +# +#start_image("rt_hist_ttc_50.png") +#hist(d[which(d<50)], xlab="hours from creation to last reply", breaks=30) +#end_image() +# +#start_image("rt_hist_ttc_10.png") +#hist(d[which(d<10)], xlab="hours from creation to last reply", breaks=30) +#end_image() +# +#d2 <- (t2$lastreply - t2$start) +#h<-hist(log(d2), plot=F, breaks=50) +#lines(h$breaks[which(h$counts!=0)], h$counts[which(h$counts!=0)]) + + +# this doesn't work as I would like. I think the bins aren't as I expect +#h <- hist(d, plot=F, breaks=c(seq(0,max(d)+1, .1))) +#plot(h$counts, log="x", pch=20, col="blue", +# main="Log-normal distribution", +# xlab="Value", ylab="Frequency") + +#plot(log(d2)) +#plot(ecdf(d2)) + +d2<-(m2$lastreply-m2$start) +start_image("rt_monitor_ttc.png") +par(mfrow=c(2,1)) +qqnorm(log(d2)) +plot_rt_hist(m2) +end_image() + +par(mfrow=c(1,1)) +start_image("rt_monitor_trends.png") +hist(log(d2[which(d2>59026)]), breaks=60, xlab="LOG(time to last-reply)", main="Monitor Queue Traffic patterns") +end_image() + +tstamp_78 <-unclass(as.POSIXct("2008-01-01", origin="1960-01-01"))[1] +tstamp_89 <-unclass(as.POSIXct("2009-01-01", origin="1960-01-01"))[1] + +m_7 <- m2[which( m2$start < tstamp_78 ),] +m_8 <- m2[which( m2$start >= tstamp_78 & m2$start < tstamp_89 ),] +m_9 <- m2[which( m2$start >= tstamp_89 ),] + + +par(mfrow=c(3,1)) +plot_rt_hist(m_7) +plot_rt_hist(m_8) +plot_rt_hist(m_9) +par(mfrow=c(1,1)) + + +tstamp <-unclass(as.POSIXct("2008-01-01", origin="1960-01-01")) +m_67 <- m2[which( m2$start < tstamp[1] ),] +m_89 <- m2[which( m2$start >= tstamp[1] ),] + + +#par(mfrow=c(2,1)) +#plot_rt_hist(t_67) +#plot_rt_hist(t_89) +par(mfrow=c(1,1)) +par(mai=c(1,1,1,2)) +par(mar=c(5,4,4,8)) + +s_list <- c('2006'=1112, '2007'=1591, '2008'=1019, '2009'=815) +m_list <- c('2006'=0, '2007'=119, '2008'=229, '2009'=251) + +start_image('rt_aggregate_traffic.png') +par(mfrow=c(1,1)) +par(mai=c(1,1,1,1)) +par(mar=c(5,4,4,4)) + +s_list <- c(1519, 1596, 1112, 1591, 1019, 815) +m_list <- c(0,0,0, 119, 229, 251) +x_online_node_list <- c(1, 2.5, 4, 5.5, 7, 8.5) +y_online_node_list <- c(330, 480, 500, 550, 575, 642) + +y<- rbind(support=s_list, monitor=m_list) +barplot(y, space=0.5, width=1, ylim=c(0,2000), xlim=c(0,9), + col=c('grey35', 'grey85'), + legend=F, ylab="Messages with One or More Replies", xlab="Year") +scale_by <- 1500 / 700 +lines(x_online_node_list, y_online_node_list*scale_by) +points(x_online_node_list, y_online_node_list*scale_by, pch=c(22)) +ticks<-c(0, 100, 200, 300, 400, 500, 600, 700) + +axis(1, labels=c('2004', '2005', '2006', '2007', '2008', '2009'), at=x_online_node_list) +axis(4, labels=ticks, at=ticks*scale_by) + +mtext("Online Node Count", 4, line=3) +legend(6.5, 2000, + cex=0.7, + legend=c("Online Node Count", "MyOps Messages", "Support Messages"), + fill=c(0, 'grey85', 'grey40'), + lty=c(1,0,0), merge=T) +end_image() + + +start_image("rt_monitor_seasonal.png") +par(mfrow=c(3,1)) +par(mai=c(.3,.3,.3,.3)) +year_hist(m_7, "2007", "2006/12/31", "2008/1/7", 60) +year_hist(m_8, "2008", "2007/12/30", "2009/1/7", 60) +year_hist(m_9, "2009", "2008/12/28", "2010/1/30", 60) +end_image() + +par(mfrow=c(1,1)) diff --git a/statistics/site_scores.r b/statistics/site_scores.r new file mode 100644 index 0000000..615d303 --- /dev/null +++ b/statistics/site_scores.r @@ -0,0 +1,83 @@ + +source("functions.r"); + +#system("../nodequery.py --nodelist > ../nodelist.txt") +#system("../comonquery.py --cache --nodelist ../nodelist.txt --select 'resptime>0' --fields='name,cpuspeed,numcores,memsize,disksize,bwlimit' | grep -v null | ./hn2lb.py | ./hn2pcustatus.py > ./out_resources.csv ") + +mdrc <- read.csv("out_resources.csv", TRUE, sep=",") + +# replace all weird numbers with defaults of 100mbps +mdrc$bwlimit <- replace(mdrc$bwlimit, which(mdrc$bwlimit==0 | mdrc$bwlimit==1), 100000) + +f<-slices_2 + +s2<- f(mdrc, FALSE); +mdrc$score <- s2; +b<-30; + +# ---------------------- +### LOGINBASE +unique_loginbase_length <- length(unique(mdrc$loginbase)); +unique_lb <- list(loginbase=array(0,c(unique_loginbase_length)), + score=array(0,c(unique_loginbase_length)), + memsize=array(0,c(unique_loginbase_length)), + disksize=array(0,c(unique_loginbase_length)), + cpuspeed=array(0,c(unique_loginbase_length)) + ) + +for ( i in 1:length(mdrc$loginbase) ) +{ + r <- mdrc[i,]; + unique_lb$loginbase[r$loginbase] <- r$loginbase; + unique_lb$score[r$loginbase] <- unique_lb$score[r$loginbase] + r$score; + + v <- f(r, TRUE); + unique_lb$memsize[r$loginbase] <- unique_lb$memsize[r$loginbase] + v[1]; + unique_lb$disksize[r$loginbase] <- unique_lb$disksize[r$loginbase] + v[2]; + unique_lb$cpuspeed[r$loginbase] <- unique_lb$cpuspeed[r$loginbase] + v[3]; +} + +df<- data.frame(unique_lb) + +h<- hist(df$score, breaks=b); +bins<-length(h$breaks); +c<- array(0,c(bins)); +d<- array(0,c(bins)); +m<- array(0,c(bins)); +b<- array(0,c(bins)); +# foreach score value, find which range it falls into, +# then in three columns for cpu, mem, disk, record the fraction of each. +# then plot each sequence in a stacked graph, perhaps beside h$counts +for ( i in 1:length(df$cpuspeed) ) +{ + r <- df[i,]; + s <- index_of_bin(h, r$score); # find bin position... + # take fraction that each component contributes to the total, and add to sum + + m[s] <- m[s] + unique_lb$memsize[r$loginbase]/r$score; + d[s] <- d[s] + unique_lb$disksize[r$loginbase]/r$score; + c[s] <- c[s] + unique_lb$cpuspeed[r$loginbase]/r$score; +} + +a <- array(c(c,d,m), dim=c(bins, 3)); + +png("/Users/soltesz/Downloads/slice_policy_3.png") +par(mfrow=c(2,1)) +par(mai=c(0.5,1,0.5,0.2)) +barplot(c(0,h$counts), + xlab="slice count", + main="Distribution of Site Scores", + ylab="Total Frequency", + ylim=c(0,70)) +par(mai=c(1.0,1,0,0.2)); +barplot(t(a), + legend=c("CPUspeed (GHz)", "DISKsize (GB)", "MEMsize (GB)"), + col=c("pink", "lightblue", "lightgreen"), + ylim=c(0,70), + ylab="Break-down by Resource", + xlab="Site Score", + names.arg=c(0,h$breaks[1:length(h$breaks)-1]), +); +dev.off() + + diff --git a/statistics/site_scores_bw.r b/statistics/site_scores_bw.r new file mode 100644 index 0000000..1697671 --- /dev/null +++ b/statistics/site_scores_bw.r @@ -0,0 +1,87 @@ + +source("functions.r"); + +#system("../nodequery.py --nodelist > ../nodelist.txt") +#system("../comonquery.py --cache --nodelist ../nodelist.txt --select 'resptime>0' --fields='name,cpuspeed,numcores,memsize,disksize,bwlimit' | grep -v null | ./hn2lb.py | ./hn2pcustatus.py > ./out_resources.csv ") + +mdrc <- read.csv("out_resources.csv", TRUE, sep=",") + +# replace all weird numbers with defaults of 100mbps +mdrc$bwlimit <- replace(mdrc$bwlimit, which(mdrc$bwlimit==0 | mdrc$bwlimit==1), 100000) + +f<-slices_3 + +s2<- f(mdrc, FALSE); +mdrc$score <- s2; +b<-30; + +# ---------------------- +### LOGINBASE +unique_loginbase_length <- length(unique(mdrc$loginbase)); +unique_lb <- list(loginbase=array(0,c(unique_loginbase_length)), + score=array(0,c(unique_loginbase_length)), + memsize=array(0,c(unique_loginbase_length)), + disksize=array(0,c(unique_loginbase_length)), + cpuspeed=array(0,c(unique_loginbase_length)), + bwlimit=array(0,c(unique_loginbase_length)) + ) + +for ( i in 1:length(mdrc$loginbase) ) +{ + r <- mdrc[i,]; + unique_lb$loginbase[r$loginbase] <- r$loginbase; + unique_lb$score[r$loginbase] <- unique_lb$score[r$loginbase] + r$score; + + v <- f(r, TRUE); + unique_lb$memsize[r$loginbase] <- unique_lb$memsize[r$loginbase] + v[1]; + unique_lb$disksize[r$loginbase] <- unique_lb$disksize[r$loginbase] + v[2]; + unique_lb$cpuspeed[r$loginbase] <- unique_lb$cpuspeed[r$loginbase] + v[3]; + unique_lb$bwlimit[r$loginbase] <- unique_lb$bwlimit[r$loginbase] + v[4]; +} + +df<- data.frame(unique_lb) + +h<- hist(df$score, breaks=b); +bins<-length(h$breaks); +c<- array(0,c(bins)); +d<- array(0,c(bins)); +m<- array(0,c(bins)); +b<- array(0,c(bins)); +# foreach score value, find which range it falls into, +# then in three columns for cpu, mem, disk, record the fraction of each. +# then plot each sequence in a stacked graph, perhaps beside h$counts +for ( i in 1:length(df$cpuspeed) ) +{ + r <- df[i,]; + s <- index_of_bin(h, r$score); # find bin position... + # take fraction that each component contributes to the total, and add to sum + + m[s] <- m[s] + unique_lb$memsize[r$loginbase]/r$score; + d[s] <- d[s] + unique_lb$disksize[r$loginbase]/r$score; + c[s] <- c[s] + unique_lb$cpuspeed[r$loginbase]/r$score; + b[s] <- b[s] + unique_lb$bwlimit[r$loginbase]/r$score; +} + +#vals <- list(bwlimit=b,cpuspeed=c,disksize=d,memsize=m) +a <- array(c(b,c,d,m), dim=c(bins, 4)); + +png("/Users/soltesz/Downloads/slice_policy_4.png") +par(mfrow=c(2,1)) +par(mai=c(0.5,1,0.5,0.2)) +barplot(c(0,h$counts), + xlab="slice count", + main="Distribution of Site Scores", + ylab="Total Frequency", + ylim=c(0,70)) +par(mai=c(1.0,1,0,0.2)); +barplot(t(a), + legend=c("BWlimit (Mbps)", "CPUspeed (GHz)", "DISKsize (GB)", "MEMsize (GB)"), + col=c("lightyellow", "pink", "lightblue", "lightgreen"), + ylim=c(0,70), + ylab="Break-down by Resource", + xlab="Site Score", + names.arg=c(0,h$breaks[1:length(h$breaks)-1]), +); +dev.off() + + diff --git a/statistics/site_scores_pcu.r b/statistics/site_scores_pcu.r new file mode 100644 index 0000000..c4a3f92 --- /dev/null +++ b/statistics/site_scores_pcu.r @@ -0,0 +1,94 @@ + +source("functions.r"); + +#system("../nodequery.py --nodelist > ../nodelist.txt") +#system("../comonquery.py --cache --nodelist ../nodelist.txt --select 'resptime>0' --fields='name,cpuspeed,numcores,memsize,disksize,bwlimit' | grep -v null | ./hn2lb.py | ./hn2pcustatus.py | sed -e "s/none/0/g" -e "s/Not_Run/0.5/g" -e "s/error/0.5/g" -e "s/Ok/1/g" > ./out_resources.csv ") + +mdrc <- read.csv("out_resources.csv", TRUE, sep=",") + +# replace all weird numbers with defaults of 100mbps +mdrc$bwlimit <- replace(mdrc$bwlimit, which(mdrc$bwlimit==0 | mdrc$bwlimit==1), 100000) +#mdrc$pcus <- replace(mdrc$pcustatus, which(mdrc$pcustatus=="none"), 0); +#mdrc$pcus <- replace(mdrc$pcus, which(mdrc$pcus=="error" | mdrc$pcusu=="Not_Run"), 0.5); +#mdrc$pcus <- replace(mdrc$pcus, which(mdrc$pcus=="Ok"), 1); + +f<-slices_4 + +s2<- f(mdrc, FALSE); +mdrc$score <- s2; +b<-30; + +# ---------------------- +### LOGINBASE +unique_loginbase_length <- length(unique(mdrc$loginbase)); +unique_lb <- list(loginbase=array(0,c(unique_loginbase_length)), + score=array(0,c(unique_loginbase_length)), + memsize=array(0,c(unique_loginbase_length)), + disksize=array(0,c(unique_loginbase_length)), + cpuspeed=array(0,c(unique_loginbase_length)), + bwlimit=array(0,c(unique_loginbase_length)), + pcustatus=array(0,c(unique_loginbase_length)) + ) + +for ( i in 1:length(mdrc$loginbase) ) +{ + r <- mdrc[i,]; + unique_lb$loginbase[r$loginbase] <- r$loginbase; + unique_lb$score[r$loginbase] <- unique_lb$score[r$loginbase] + r$score; + + v <- f(r, TRUE); + unique_lb$memsize[r$loginbase] <- unique_lb$memsize[r$loginbase] + v[1]; + unique_lb$disksize[r$loginbase] <- unique_lb$disksize[r$loginbase] + v[2]; + unique_lb$cpuspeed[r$loginbase] <- unique_lb$cpuspeed[r$loginbase] + v[3]; + unique_lb$bwlimit[r$loginbase] <- unique_lb$bwlimit[r$loginbase] + v[4]; + unique_lb$pcustatus[r$loginbase] <- unique_lb$pcustatus[r$loginbase] + v[5]; +} + +df<- data.frame(unique_lb) + +h<- hist(df$score, breaks=b); +bins<-length(h$breaks); +c<- array(0,c(bins)); +d<- array(0,c(bins)); +m<- array(0,c(bins)); +b<- array(0,c(bins)); +p<- array(0,c(bins)); +# foreach score value, find which range it falls into, +# then in three columns for cpu, mem, disk, record the fraction of each. +# then plot each sequence in a stacked graph, perhaps beside h$counts +for ( i in 1:length(df$cpuspeed) ) +{ + r <- df[i,]; + s <- index_of_bin(h, r$score); # find bin position... + # take fraction that each component contributes to the total, and add to sum + + m[s] <- m[s] + unique_lb$memsize[r$loginbase]/r$score; + d[s] <- d[s] + unique_lb$disksize[r$loginbase]/r$score; + c[s] <- c[s] + unique_lb$cpuspeed[r$loginbase]/r$score; + b[s] <- b[s] + unique_lb$bwlimit[r$loginbase]/r$score; + p[s] <- p[s] + unique_lb$pcustatus[r$loginbase]/r$score; +} + +#vals <- list(bwlimit=b,cpuspeed=c,disksize=d,memsize=m) +a <- array(c(p,b,c,d,m), dim=c(bins, 5)); + +#png("/Users/soltesz/Downloads/slice_policy_5.png") +par(mfrow=c(2,1)) +par(mai=c(0.5,1,0.5,0.2)) +barplot(c(0,h$counts), + xlab="slice count", + main="Distribution of Site Scores", + ylab="Total Frequency", + ylim=c(0,70)) +par(mai=c(1.0,1,0,0.2)); +barplot(t(a), + legend=c("PCU Status", "BWlimit (Mbps)", "CPUspeed (GHz)", "DISKsize (GB)", "MEMsize (GB)"), + col=c("orange", "lightyellow", "pink", "lightblue", "lightgreen"), + ylim=c(0,70), + ylab="Break-down by Resource", + xlab="Site Score", + names.arg=c(0,h$breaks[1:length(h$breaks)-1]), +); +#dev.off() + +