convert some sites and users into nagios a configuration
[monitor.git] / tools / plc_hosts_to_nagios.py
1 #!/usr/bin/python
2 from nagiosobjects import *
3
4 command_auto = Command(command_name="automate-host-reboot-command",
5                                            command_line="""/usr/share/monitor/commands/reboot.py $NOTIFICATIONTYPE$ $HOSTNAME$""")
6
7 contact_auto = Contact(contact_name="automate-host-reboot-contact",
8                                                 host_notifications_enabled=1,
9                                                 service_notifications_enabled=0,
10                                                 host_notification_period="24x7",
11                                                 host_notification_options="d,r",
12                                                 host_notification_commands="automate-host-reboot-command",
13                                                 service_notification_period="24x7",
14                                                 service_notification_commands="monitor-notify-service-by-email",
15                                                 email="not.an.email")
16
17 print command_auto.toString()
18 print contact_auto.toString()
19
20 globalservices = []
21 for service in [('NET', "Network Services"),
22                                 ('SSH', "SSH Service"),
23                                 ('SSH806', "Auxiliary SSH Service"),
24                                 ('HTTP', "PlanetFlow HTTP"),
25                                 ('COTOP', "HTTP based COTOP"),
26                                 ]:
27                                 #('PLSOFT', "PlanetLab Software"),
28                                 #('MGMT',  "Remote Management")]:
29         globalservices.append(ServiceGroup(servicegroup_name=service[0], alias=service[1]))
30
31
32 # NOTE: since ping is not a reliable check in the wide area, use 'check_ssh'
33 #               to determine if the host is minimally online.  If we cannot access
34 #               port 22 it, then it is DOWN.
35
36 globalhost = [Host(     name="planetlab-host",
37                                         use="generic-host",
38                                         check_period="24x7",
39                                         check_interval="120",
40                                         retry_interval="10",
41                                         max_check_attempts="6",
42                                         check_command="check_ssh!-t 120",
43                                         contact_groups="admins",
44                                         register="0")]
45
46 for obj in globalhost + globalservices:
47         print obj.toString()
48
49 from monitor.wrapper import plc
50 from monitor.generic import *
51
52 l_sites = plc.api.GetSites({'login_base' : ['asu', 'gmu']})
53 #l_sites = plc.api.GetSites([10243, 22, 10247, 138, 139, 10050, 10257, 18, 20, 
54 #                                                       21, 10134, 24, 10138, 10141, 30, 31, 33, 10279, 41, 29, 10193, 10064, 81,
55 #                                                       10194, 10067, 87, 10208, 10001, 233, 157, 10100, 10107])
56
57 node_ids = [ s['node_ids'] for s in l_sites ]
58 node_ids = [ map(str,n) for n in node_ids ] 
59 node_ids = [ ",".join(n) for n in node_ids ] 
60 node_ids = ",".join(node_ids)
61 node_ids = map(int, node_ids.split(","))
62
63 l_nodes = plc.api.GetNodes(node_ids)
64
65 (d_sites,id2lb) = dsites_from_lsites_id(l_sites)
66 (plcdb, hn2lb, lb2hn) = dsn_from_dsln(d_sites, id2lb, l_nodes)
67
68 netid2ip = d_from_l(plc.api.GetInterfaces(), 'interface_id')
69
70 for site in l_sites:
71         shortname = site['abbreviated_name']
72         lb = site['login_base']
73         hg = HostGroup(hostgroup_name=lb, alias=shortname)
74         lat = site['latitude']
75         lon = site['longitude']
76         lon_x = -1
77         lat_y = -1
78         if lat is not None and lon is not None:
79                 scale = 5
80                 lon_x = int(180 + lon) * scale
81                 lat_y = int(180 - (lat + 90)) * scale
82
83         if site['login_base'] in lb2hn:
84                 nodes = lb2hn[site['login_base']]
85         else:
86                 continue
87
88         if len(nodes) == 0:
89                 continue
90
91         print hg.toString()
92
93         hostname_list = []
94         for node in nodes:
95                 hn = node['hostname']
96                 if len(node['interface_ids']) == 0:
97                         continue
98
99                 ip = netid2ip[str(node['interface_ids'][0])]['ip']
100
101                 if lon_x is not -1 and lat_y is not -1:
102                         coords="%s,%s" % (lon_x, lat_y)
103                 else:
104                         coords="0,0"
105                         
106                 h = Host(use="planetlab-host",
107                                 host_name=hn,
108                                 alias=hn,
109                                 address=ip,
110                                 d2_coords=coords,
111                                 statusmap_image="icon-system.png",
112                                 hostgroups=lb)
113
114                 print h.toString()
115
116                 hostname_list.append(hn)
117         
118         # NOTE: use all hostnames at site to create HostEscalations for down-notices
119         if len(hostname_list) > 0:
120
121                 hn_list = ",".join(hostname_list)
122                 # NOTE: always send notices to techs
123                 he1 = HostEscalation( host_name=hn_list,
124                                                 first_notification=3,
125                                                 last_notification=0,
126                                                 notification_interval=24*60*1,
127                                                 escalation_options="r,d",
128                                                 contact_groups="%s-techs" % lb)
129
130                 # NOTE: only send notices to PIs after a week. (2 prior notices) 
131                 he2 = HostEscalation( host_name=hn_list,
132                                                 first_notification=5,
133                                                 last_notification=0,
134                                                 notification_interval=24*60*1,
135                                                 escalation_options="r,d",
136                                                 contact_groups="%s-pis" % lb)
137
138                 # NOTE: send notices to Slice users after two weeks. (4 prior notices) 
139                 he3 = HostEscalation( host_name=hn_list,
140                                                 first_notification=7,
141                                                 last_notification=0,
142                                                 notification_interval=24*60*1,
143                                                 escalation_options="r,d",
144                                                 contact_groups="%s-sliceusers" % lb)
145
146                 for he in [he1, he2, he3]:
147                         print he.toString()
148
149                 he_reboot = HostEscalation(host_name=hn_list,
150                                                 first_notification=2,
151                                                 last_notification=2,
152                                                 notification_interval=24*60*0.5,
153                                                 escalation_options="d",
154                                                 contacts="automate-host-reboot-contact")
155
156                 print he_reboot.toString()
157
158
159 if len(hostname_list) > 0:
160                 hn = ",".join(hostname_list)
161
162                 s1 = Service(use="generic-service",
163                                         host_name="*",
164                                         service_description="aSSH",
165                                         display_name="aSSH",
166                                         servicegroups="NET,SSH",
167                                         notifications_enabled="0",
168                                         check_command="check_ssh!-t 120")
169                 s2 = Service(use="generic-service",
170                                         host_name="*",
171                                         service_description="bSSH806",
172                                         display_name="bSSH806",
173                                         servicegroups="NET,SSH806",
174                                         notifications_enabled="0",
175                                         check_command="check_ssh!-p 806 -t 120")
176                 s3 = Service(use="generic-service",
177                                         host_name="*",
178                                         service_description="cHTTP",
179                                         display_name="cHTTP",
180                                         servicegroups="NET,HTTP",
181                                         notifications_enabled="0",
182                                         check_command="check_http!-t 120")
183                 s4 = Service(use="generic-service",
184                                         host_name="*",
185                                         service_description="dCOTOP",
186                                         display_name="dCOTOP",
187                                         servicegroups="NET,COTOP",
188                                         notifications_enabled="0",
189                                         check_command="check_http!-p 3120 -t 120")
190
191
192
193
194                 sd1 = ServiceDependency(host_name="*",
195                                                                 service_description="aSSH",
196                                                                 dependent_service_description="bSSH806,cHTTP,dCOTOP",
197                                                                 execution_failure_criteria="w,u,c,p",)
198
199                 for service in [s1,s2,s3,s4,sd1]:
200                         print service.toString()
201