54183c06a36379a4d7ad34a3c18f5b9825d77887
[monitor.git] / statistics / parserpms.py
1 #!/usr/bin/python
2
3 import sys
4 import os
5 import md5
6 import re
7 from monitor.util import file as fileutil
8
9 purpose_message="""
10   This utility is designed to simplify the task of parsing and generating
11   statistics for the number of packages on PlanetLab nodes.
12 """
13
14 def list_to_md5(strlist):
15     digest = md5.new()
16     for f in strlist:
17         digest.update(f)
18
19     return digest.hexdigest()
20
21 def pick_some_rpms(pattern, rpmlist):
22     l = []
23     cpatt = re.compile(pattern)
24     for rpm in rpmlist:
25         if cpatt.search(rpm):
26             l.append(rpm)
27     return l
28
29 def main():
30     global api
31     global config
32
33     from optparse import OptionParser
34     parser = OptionParser()
35
36     parser.set_defaults( select=None,
37                          input=None,
38                          frequency=False,
39                          package=True,
40                         )
41
42     parser.add_option("", "--input", dest="input", 
43                         help="the input file")
44     parser.add_option("", "--pattern", dest="select", 
45                         help="the pattern to pull out from rpm list")
46     parser.add_option("", "--frequency", dest="frequency", action="store_true",
47                         help="print the frequency of packages matched by select")
48     parser.add_option("", "--disablepackage", dest="package", action="store_false",
49                         help="print the frequency of each pl package")
50     (config, args) = parser.parse_args()
51     if len(sys.argv) == 1 or config.input is None:
52         print purpose_message
53         parser.print_help()
54         sys.exit(1)
55
56     rpmlist = fileutil.getListFromFile(config.input)
57
58     current_packages = ['NodeManager-1.8-5.planetlab',
59                 'NodeUpdate-0.5-4.planetlab', 'codemux-0.1-13.planetlab',
60                 'fprobe-ulog-1.1.3-0.planetlab', 'ipod-2.2-1.planetlab',
61                 'iproute-2.6.16-2.planetlab', 'iptables-1.3.8-9.planetlab',
62                 'kernel-2.6.22.19-vs2.3.0.34.28.planetlab',
63                 'madwifi-0.9.4-2.6.22.19.3.planetlab', 'monitor-1.0-7.planetlab',
64                 'monitor-client-3.0-10.planetlab',
65                 'monitor-runlevelagent-3.0-10.planetlab', 'pl_mom-2.3-1.planetlab',
66                 'pl_sshd-1.0-11.planetlab', 'pyplnet-4.3-2.planetlab',
67                 'util-vserver-pl-0.3-16.planetlab',
68                 'vserver-planetlab-f8-i386-4.2-12.2009.05.27',
69                 'vserver-systemslices-planetlab-f8-i386-4.2-12.2009.05.27',
70                 'vsys-0.9-3.planetlab', 'vsys-scripts-0.95-3.planetlab']
71
72     # PL RPMS
73     if config.package:
74         all_patterns = map(lambda x: ".*" + x + ".*", [ 'NodeManager', 
75                 'NodeUpdate', 'codemux', 'fprobe', 'ipod',
76                 'iproute', 'iptables', 'kernel', 'madwifi', 'monitor-client', 
77                 'monitor-runlevelagent', 'monitor', 'oombailout', 'pl_mom', 
78                 'pl_sshd', 'pyplnet', 'util-vserver-pl', 'vserver-planetlab-f8-i386', 
79                 'vserver-systemslices-planetlab-f8-i386', 'vsys-scripts', 'vsys'])
80     else:
81         all_patterns = [config.select]
82     
83     for pattern in all_patterns:
84         return_sums = {}
85         for line in rpmlist:
86             line = line.strip()
87             fields = line.split()
88             host = fields[1]
89             rpms = fields[2:]
90             rpms.sort()
91             rpms = pick_some_rpms(pattern, rpms)
92             if len(rpms) != 0:
93                 sum = list_to_md5(rpms)
94                 try:
95                     return_sums[sum]['hosts'].append(host)
96                 except:
97                     return_sums[sum] = {'hosts' : [], 'diff' : []}
98                     return_sums[sum]['hosts'].append(host)
99
100                 return_sums[sum]['diff'] = set(rpms) - set(current_packages) 
101
102         if config.frequency:
103             #print "Frequency for packages that matched: %s" % pattern
104             sum_list = []
105             for sum in return_sums:
106                 sum_list.append((len(return_sums[sum]['hosts']), sum))
107
108             sum_list.sort(lambda a,b: cmp(b[0], a[0]))
109             for sum in sum_list:
110                 #print sum[0], sum[1], map(lambda x: x.replace('.planetlab', ''), return_sums[sum[1]]['diff'])
111                 print sum[0], sum[1], len(map(lambda x: x.replace('.planetlab', ''), return_sums[sum[1]]['diff']))
112
113 if __name__ == "__main__":
114         try:
115                 main()
116         except IOError:
117                 pass