clearer names for actions, and infer actions better
[monitor.git] / statistics / rpm_dist.r
1 #####
2
3 #system("URL='https://monitor.planet-lab.org:443/monitor/query?object=nodes&nodehistory_hostname=&hostname=on&observed_status=on&rpms=on&rpmvalue=planetlab&tg_format=plain'; curl -s --insecure $URL | grep -v DOWN | grep -v DEBUG | /usr/share/monitor/statistics/hn2rpms.py > out_rpm.csv");
4 #system("grep MD5SUMS /usr/share/monitor/monitor.log | grep -v measurement-lab | awk 'BEGIN { printf \"hostname,yumsum\\n\" } {if ( $3 != \"\") { printf \"%s,%s\\n\", $2,$3 } }' > yumsum.csv")
5
6 r <- read.csv("out_rpm.csv")
7 ys<- read.csv('yumsum.csv')
8 m<-merge(r,ys, by="hostname")
9
10 s<-table(factor(r$NodeManager), factor(r$kernel), factor(r$iptables));
11 plot(s);
12
13 ideal<-c(NodeManager='NodeManager-1.8-12.planetlab.1',
14          NodeUpdate='NodeUpdate-0.5-4.planetlab', 
15                  codemux='codemux-0.1-13.planetlab',
16          fprobe.ulog='fprobe-ulog-1.1.3-0.planetlab', 
17                  ipod='ipod-2.2-1.planetlab',
18          iproute='iproute-2.6.16-2.planetlab', 
19                  iptables='iptables-1.3.8-9.planetlab',
20          kernel='kernel-2.6.22.19-vs2.3.0.34.39.planetlab',
21          madwifi='madwifi-0.9.4-2.6.22.19.3.planetlab', 
22                  monitor.client='monitor-client-3.0-17.planetlab',
23          monitor.runlevelagent='monitor-runlevelagent-3.0-17.planetlab', 
24                  pl_mom='pl_mom-2.3-1.planetlab',
25          pl_sshd='pl_sshd-1.0-11.planetlab', 
26                  pyplnet='pyplnet-4.3-3.planetlab',
27          util.vserver.pl='util-vserver-pl-0.3-17.planetlab',
28          vserver.planetlab.f8.i386='vserver-planetlab-f8-i386-4.2-12.2009.06.23',
29          vserver.systemslices.planetlab.f8.i386='vserver-systemslices-planetlab-f8-i386-4.2-12.2009.06.23',
30          vsys='vsys-0.9-3.planetlab', 
31                  vsys.scripts='vsys-scripts-0.95-11.planetlab');
32
33 r_summary <- lapply(r[,4:23], summary)
34 for (i in 1:length(r_summary))
35 {
36     n<-sort(unlist(r_summary[i]), decreasing=TRUE)
37         names(n[1])
38 }
39
40 as.numeric(factor(ideal[1], levels(r$NodeManager)))
41
42 cv <- function ( row , rows=566, start_col=4, end_col=23, ref=NULL)
43 {
44         ret<-NULL;
45     for ( i in 1:rows )
46         {
47                 r_l <-NULL
48             for ( name in names(row) ) 
49                 {
50                         # NOTE: this doesn't work unless the levels in row are a subset of ref's levels.
51                         x<-as.numeric(factor(row[i,name], levels(factor(unlist(row[name])))));
52                         r_l <- c(r_l, x);
53                 }
54                 #r<-as.numeric(row[i,start_col:end_col]);
55                 str<- paste(as.character(r_l), collapse="-", sep="-");
56                 ret<- rbind(ret, str);
57         }
58         return (ret);
59 }
60
61 grow <- function (d, column, val)
62 {
63     r <- which(d[column] == val);
64         return (d[r,]);
65 }
66
67 cv(m, length(m$hostname));
68 i<-data.frame(t(ideal));
69 cv(i, 1, 1, length(ideal));
70
71         # ---
72
73 x<-cv(r, length(r$hostname))
74 x2<-factor(x)
75 # plot the frequency of each RPM package combination
76 barplot(sort(table(x2), decreasing=TRUE), 
77                 ylim=c(0, max(table(x2))),
78                 xlab="Unique Package Combinations",
79                 ylab="Frequency",
80                 axisnames=FALSE,
81                 main=paste("Distribution of Packages for", length(r$hostname),"nodes"));
82
83 png("/Users/soltesz/Downloads/rpm_plpackages_distribution_1.png",
84         width=640,
85         height=300,
86         unit="px")
87 # 1x1 grid, with 1" margins on the bottom/left, 0.5" on the top/right
88 par(mfrow=c(1,1));
89 par(mai=c(1,1,0.5,0.5));
90 barplot(sort(table(x2), decreasing=TRUE), 
91                 ylim=c(0, max(table(x2))),
92                 xlab="Unique Package Combinations",
93                 ylab="Frequency",
94                 axisnames=FALSE,
95                 main=paste("Distribution of Packages for", length(r$hostname),"nodes"));
96 dev.off()
97
98
99
100 #convert_rpm <- function ( row )
101 #{
102 #       c <- as.character(row$rpms)
103 #       rpm_list <- unlist(strsplit(c, " "))
104 #       rpm_sort <- paste(sort(rpm_list), collapse="::");
105 #       return (rpm_sort);
106 #}
107
108 #s<-convert_rpm(r)
109
110 #for ( row in r[,] )
111 #{
112 #       c <- as.character(row$rpms)
113 #       rpm_list <- unlist(strsplit(c, " "))
114 #       row$rpm_sort <- paste(sort(rpm_list), collapse="::");
115 #
116 #       #for ( rpm in rpm_list ) 
117 #       #{
118 #       #       fields <- unlist(strsplit(rpm, "-"));
119 #       #       s <- sort(fields);
120 #       #}
121 #}
122 #
123 #s<-sort(rpm_list);
124
125