- support joins and wheres in config file
[plcapi.git] / tools / dzombie.py
1 #!/usr/bin/python
2 #
3 # Tool that removes zombie records from database tables#
4 import sys
5 import os
6 import getopt
7 import pgdb
8 from pprint import pprint
9
10 schema_file = None
11 config_file = "/etc/planetlab/plc_config"
12 config = {}
13 execfile(config_file, config)
14
15 def usage():
16         print "Usage: %s SCHEMA_FILE " % sys.argv[0]
17         sys.exit(1)
18
19 try:
20         schema_file  = sys.argv[1]
21 except IndexError:
22         print "Error: too few arguments"
23         usage()
24
25 # all foreing keys exist as primary kyes in another table
26 # will represent all foreign keys as
27 # { 'table.foreign_key': 'table.primary_key'} 
28 foreign_keys = {}
29 foreign_keys_ordered = []
30 zombie_keys = {}
31 # parse the schema for foreign keys
32 try:
33         file = open(schema_file, 'r')
34         index = 0
35         lines = file.readlines()
36         while index < len(lines):
37                 line = lines[index].strip()
38                 # find all created objects
39                 if line.startswith("CREATE"):
40                         item_type = line.split(" ")[1].strip()
41                         item_name = line.split(" ")[2].strip()
42                         if item_type.upper() in ['TABLE']:
43                                 while index < len(lines):
44                                         index = index + 1
45                                         nextline =lines[index].strip()
46                                         if nextline.find("--") > -1:
47                                                 nextline = nextline[0:nextline.index("--")].replace(',', '')
48                                         if nextline.upper().find("REFERENCES") > -1:
49                                                 nextline_parts = nextline.split(" ")
50                                                 foreign_key_name = nextline_parts[0].strip()
51                                                 foreign_key_table = nextline_parts[nextline_parts.index("REFERENCES")+1].strip()
52                                                 foreign_key = item_name + "."+ foreign_key_name
53                                                 primary_key = foreign_key_table +"."+ foreign_key_name 
54                                                 foreign_keys[foreign_key] = primary_key
55                                                 foreign_keys_ordered.append(foreign_key)
56                                         elif nextline.find(";") >= 0:
57                                                 break
58                 index = index + 1
59 except:
60         raise
61
62 db = pgdb.connect(user = config['PLC_DB_USER'],
63                   database = config['PLC_DB_NAME'])
64 cursor = db.cursor()
65 try:
66         for foreign_key in foreign_keys_ordered:
67                 primary_key = foreign_keys[foreign_key]
68                 sql = "SELECT distinct %s from %s"
69                 
70                 # get all foreign keys in this table
71                 foreign_key_parts = foreign_key.split(".")
72         
73                 # do not delete from primary tables
74                 if foreign_key_parts[0] in ['addresses', 'boot_states', 'conf_files', \
75                         'keys', 'messages', 'nodegroups', 'nodenetworks', 'nodes', 'pcus', 'peers' \
76                         'persons', 'roles', 'sessions', 'sites', 'slices']:
77                         #print "skipping table %s" % foreign_key_parts[0] 
78                         continue
79
80                 cursor.execute(sql % (foreign_key_parts[1], foreign_key_parts[0]))
81                 foreign_rows = cursor.fetchall()
82                                 
83                 # get all the primary keys from this foreign key's primary table 
84                 primary_key_parts = primary_key.split(".")
85                 # foreign key name may not match primary key name. must rename these
86                 if primary_key_parts[1] == 'creator_person_id':
87                         primary_key_parts[1] = 'person_id'
88                 elif primary_key_parts[1] == 'min_role_id':
89                         primary_key_parts[1]  = 'role_id'
90                 sql = sql % (primary_key_parts[1], primary_key_parts[0])
91                 
92                 # determin which primary records are deleted
93                 desc = os.popen('psql planetlab4 postgres -c "\d %s;"' % primary_key_parts[0])
94                 result = desc.readlines()
95                 if primary_key_parts[0] in ['slices']:
96                         sql  = sql + " where name not like '%_deleted'"
97                 elif filter(lambda line: line.find("deleted") > -1, result):
98                         sql = sql + " where deleted = false"
99
100                 cursor.execute(sql)
101                 primary_key_rows = cursor.fetchall()
102                 
103                 # if foreign key isnt present in primay_key query, it either doesnt exist or marked as deleted
104                 # also, ignore null foreign keys, not considered zombied
105                 zombie_keys_func = lambda key: key not in primary_key_rows and not key == [None]
106                 zombie_keys_list = [zombie_key[0] for zombie_key in filter(zombie_keys_func, foreign_rows)]
107                 print zombie_keys_list
108                 # delete these zombie records
109                 if zombie_keys_list:
110                         print " -> Deleting %d zombie record(s) from %s after checking %s" % \
111                                         (len(zombie_keys_list), foreign_key[0], primary_key[0])
112                         sql_delete = 'DELETE FROM %s WHERE %s IN %s' % \
113                         (foreign_key_parts[0], foreign_key_parts[1], tuple(zombie_keys_list))
114                         cursor.execute(sql_delete)
115                         db.commit()
116                 #zombie_keys[foreign_key] = zombie_keys_list
117         print "done"
118 except pgdb.DatabaseError:
119         raise