--- /dev/null
+#####
+
+#system("URL='https://monitor.planet-lab.org:443/monitor/query?object=nodes&nodehistory_hostname=&hostname=on&observed_status=on&rpms=on&rpmvalue=planetlab&tg_format=plain'; curl -s --insecure $URL | grep -v DOWN | grep -v DEBUG | /usr/share/monitor/statistics/hn2rpms.py > out_rpm.csv");
+#system("grep MD5SUMS /usr/share/monitor/monitor.log | grep -v measurement-lab | awk 'BEGIN { printf \"hostname,yumsum\\n\" } {if ( $3 != \"\") { printf \"%s,%s\\n\", $2,$3 } }' > yumsum.csv")
+
+r <- read.csv("out_rpm.csv")
+ys<- read.csv('yumsum.csv')
+m<-merge(r,ys, by="hostname")
+
+s<-table(factor(r$NodeManager), factor(r$kernel), factor(r$iptables));
+plot(s);
+
+ideal<-c(NodeManager='NodeManager-1.8-12.planetlab.1',
+ NodeUpdate='NodeUpdate-0.5-4.planetlab',
+ codemux='codemux-0.1-13.planetlab',
+ fprobe.ulog='fprobe-ulog-1.1.3-0.planetlab',
+ ipod='ipod-2.2-1.planetlab',
+ iproute='iproute-2.6.16-2.planetlab',
+ iptables='iptables-1.3.8-9.planetlab',
+ kernel='kernel-2.6.22.19-vs2.3.0.34.39.planetlab',
+ madwifi='madwifi-0.9.4-2.6.22.19.3.planetlab',
+ monitor.client='monitor-client-3.0-17.planetlab',
+ monitor.runlevelagent='monitor-runlevelagent-3.0-17.planetlab',
+ pl_mom='pl_mom-2.3-1.planetlab',
+ pl_sshd='pl_sshd-1.0-11.planetlab',
+ pyplnet='pyplnet-4.3-3.planetlab',
+ util.vserver.pl='util-vserver-pl-0.3-17.planetlab',
+ vserver.planetlab.f8.i386='vserver-planetlab-f8-i386-4.2-12.2009.06.23',
+ vserver.systemslices.planetlab.f8.i386='vserver-systemslices-planetlab-f8-i386-4.2-12.2009.06.23',
+ vsys='vsys-0.9-3.planetlab',
+ vsys.scripts='vsys-scripts-0.95-11.planetlab');
+
+r_summary <- lapply(r[,4:23], summary)
+for (i in 1:length(r_summary))
+{
+ n<-sort(unlist(r_summary[i]), decreasing=TRUE)
+ names(n[1])
+}
+
+as.numeric(factor(ideal[1], levels(r$NodeManager)))
+
+cv <- function ( row , rows=566, start_col=4, end_col=23, ref=NULL)
+{
+ ret<-NULL;
+ for ( i in 1:rows )
+ {
+ r_l <-NULL
+ for ( name in names(row) )
+ {
+ # NOTE: this doesn't work unless the levels in row are a subset of ref's levels.
+ x<-as.numeric(factor(row[i,name], levels(factor(unlist(row[name])))));
+ r_l <- c(r_l, x);
+ }
+ #r<-as.numeric(row[i,start_col:end_col]);
+ str<- paste(as.character(r_l), collapse="-", sep="-");
+ ret<- rbind(ret, str);
+ }
+ return (ret);
+}
+
+grow <- function (d, column, val)
+{
+ r <- which(d[column] == val);
+ return (d[r,]);
+}
+
+cv(m, length(m$hostname));
+i<-data.frame(t(ideal));
+cv(i, 1, 1, length(ideal));
+
+ # ---
+
+x<-cv(r, length(r$hostname))
+x2<-factor(x)
+# plot the frequency of each RPM package combination
+barplot(sort(table(x2), decreasing=TRUE),
+ ylim=c(0, max(table(x2))),
+ xlab="Unique Package Combinations",
+ ylab="Frequency",
+ axisnames=FALSE,
+ main=paste("Distribution of Packages for", length(r$hostname),"nodes"));
+
+png("/Users/soltesz/Downloads/rpm_plpackages_distribution_1.png",
+ width=640,
+ height=300,
+ unit="px")
+# 1x1 grid, with 1" margins on the bottom/left, 0.5" on the top/right
+par(mfrow=c(1,1));
+par(mai=c(1,1,0.5,0.5));
+barplot(sort(table(x2), decreasing=TRUE),
+ ylim=c(0, max(table(x2))),
+ xlab="Unique Package Combinations",
+ ylab="Frequency",
+ axisnames=FALSE,
+ main=paste("Distribution of Packages for", length(r$hostname),"nodes"));
+dev.off()
+
+
+
+#convert_rpm <- function ( row )
+#{
+# c <- as.character(row$rpms)
+# rpm_list <- unlist(strsplit(c, " "))
+# rpm_sort <- paste(sort(rpm_list), collapse="::");
+# return (rpm_sort);
+#}
+
+#s<-convert_rpm(r)
+
+#for ( row in r[,] )
+#{
+# c <- as.character(row$rpms)
+# rpm_list <- unlist(strsplit(c, " "))
+# row$rpm_sort <- paste(sort(rpm_list), collapse="::");
+#
+# #for ( rpm in rpm_list )
+# #{
+# # fields <- unlist(strsplit(rpm, "-"));
+# # s <- sort(fields);
+# #}
+#}
+#
+#s<-sort(rpm_list);
+
+