2 from nagiosobjects import *
4 command_auto = Command(command_name="automate-host-reboot-command",
5 command_line="""/usr/share/monitor/commands/reboot.py $NOTIFICATIONTYPE$ $HOSTNAME$""")
7 contact_auto = Contact(contact_name="automate-host-reboot-contact",
8 host_notifications_enabled=1,
9 service_notifications_enabled=0,
10 host_notification_period="24x7",
11 host_notification_options="d,r",
12 host_notification_commands="automate-host-reboot-command",
13 service_notification_period="24x7",
14 service_notification_commands="monitor-notify-service-by-email",
17 print command_auto.toString()
18 print contact_auto.toString()
21 for service in [('NET', "Network Services"),
22 ('SSH', "SSH Service"),
23 ('SSH806', "Auxiliary SSH Service"),
24 ('HTTP', "PlanetFlow HTTP"),
25 ('COTOP', "HTTP based COTOP"),
27 #('PLSOFT', "PlanetLab Software"),
28 #('MGMT', "Remote Management")]:
29 globalservices.append(ServiceGroup(servicegroup_name=service[0], alias=service[1]))
32 # NOTE: since ping is not a reliable check in the wide area, use 'check_ssh'
33 # to determine if the host is minimally online. If we cannot access
34 # port 22 it, then it is DOWN.
36 globalhost = [Host( name="planetlab-host",
41 max_check_attempts="6",
42 check_command="check_ssh!-t 120",
43 contact_groups="admins",
46 for obj in globalhost + globalservices:
49 from monitor.wrapper import plc
50 from monitor.generic import *
52 l_sites = plc.api.GetSites({'login_base' : ['asu', 'gmu']})
53 #l_sites = plc.api.GetSites([10243, 22, 10247, 138, 139, 10050, 10257, 18, 20,
54 # 21, 10134, 24, 10138, 10141, 30, 31, 33, 10279, 41, 29, 10193, 10064, 81,
55 # 10194, 10067, 87, 10208, 10001, 233, 157, 10100, 10107])
57 node_ids = [ s['node_ids'] for s in l_sites ]
58 node_ids = [ map(str,n) for n in node_ids ]
59 node_ids = [ ",".join(n) for n in node_ids ]
60 node_ids = ",".join(node_ids)
61 node_ids = map(int, node_ids.split(","))
63 l_nodes = plc.api.GetNodes(node_ids)
65 (d_sites,id2lb) = dsites_from_lsites_id(l_sites)
66 (plcdb, hn2lb, lb2hn) = dsn_from_dsln(d_sites, id2lb, l_nodes)
68 netid2ip = d_from_l(plc.api.GetInterfaces(), 'interface_id')
71 shortname = site['abbreviated_name']
72 lb = site['login_base']
73 hg = HostGroup(hostgroup_name=lb, alias=shortname)
74 lat = site['latitude']
75 lon = site['longitude']
78 if lat is not None and lon is not None:
80 lon_x = int(180 + lon) * scale
81 lat_y = int(180 - (lat + 90)) * scale
83 if site['login_base'] in lb2hn:
84 nodes = lb2hn[site['login_base']]
96 if len(node['interface_ids']) == 0:
99 ip = netid2ip[str(node['interface_ids'][0])]['ip']
101 if lon_x is not -1 and lat_y is not -1:
102 coords="%s,%s" % (lon_x, lat_y)
106 h = Host(use="planetlab-host",
111 statusmap_image="icon-system.png",
116 hostname_list.append(hn)
118 # NOTE: use all hostnames at site to create HostEscalations for down-notices
119 if len(hostname_list) > 0:
121 hn_list = ",".join(hostname_list)
122 # NOTE: always send notices to techs
123 he1 = HostEscalation( host_name=hn_list,
124 first_notification=3,
126 notification_interval=24*60*1,
127 escalation_options="r,d",
128 contact_groups="%s-techs" % lb)
130 # NOTE: only send notices to PIs after a week. (2 prior notices)
131 he2 = HostEscalation( host_name=hn_list,
132 first_notification=5,
134 notification_interval=24*60*1,
135 escalation_options="r,d",
136 contact_groups="%s-pis" % lb)
138 # NOTE: send notices to Slice users after two weeks. (4 prior notices)
139 he3 = HostEscalation( host_name=hn_list,
140 first_notification=7,
142 notification_interval=24*60*1,
143 escalation_options="r,d",
144 contact_groups="%s-sliceusers" % lb)
146 for he in [he1, he2, he3]:
149 he_reboot = HostEscalation(host_name=hn_list,
150 first_notification=2,
152 notification_interval=24*60*0.5,
153 escalation_options="d",
154 contacts="automate-host-reboot-contact")
156 print he_reboot.toString()
159 if len(hostname_list) > 0:
160 hn = ",".join(hostname_list)
162 s1 = Service(use="generic-service",
164 service_description="aSSH",
166 servicegroups="NET,SSH",
167 notifications_enabled="0",
168 check_command="check_ssh!-t 120")
169 s2 = Service(use="generic-service",
171 service_description="bSSH806",
172 display_name="bSSH806",
173 servicegroups="NET,SSH806",
174 notifications_enabled="0",
175 check_command="check_ssh!-p 806 -t 120")
176 s3 = Service(use="generic-service",
178 service_description="cHTTP",
179 display_name="cHTTP",
180 servicegroups="NET,HTTP",
181 notifications_enabled="0",
182 check_command="check_http!-t 120")
183 s4 = Service(use="generic-service",
185 service_description="dCOTOP",
186 display_name="dCOTOP",
187 servicegroups="NET,COTOP",
188 notifications_enabled="0",
189 check_command="check_http!-p 3120 -t 120")
194 sd1 = ServiceDependency(host_name="*",
195 service_description="aSSH",
196 dependent_service_description="bSSH806,cHTTP,dCOTOP",
197 execution_failure_criteria="w,u,c,p",)
199 for service in [s1,s2,s3,s4,sd1]:
200 print service.toString()