clearer names for actions, and infer actions better
[monitor.git] / nagios / plc_to_nagios.py
1 #!/usr/bin/python
2
3 import plc
4 from plc_config import *
5 from nagiosobjects import *
6 from generic import *
7 import auth
8 import socket
9
10 print Command(command_name="check_plc_api",
11               command_line="""/usr/share/monitor/nagios/plugins/checkplc.py -H $HOSTNAME$ """).toString()
12
13 #print Command(command_name="check_plc_web",
14 #              command_line="""/usr/share/monitor/nagios/plugins/checkplc.py -H $HOSTNAME$ --sn $SERVICENOTIFICATIONNUMBER$ """).toString()
15
16 #print Command(command_name="check_plc_db",
17 #              command_line="""/usr/share/monitor/nagios/plugins/checkplc.py -H $HOSTNAME$ --sn $SERVICENOTIFICATIONNUMBER$ """).toString()
18
19
20 globalhost = [Host(    name="planetlab-server",
21                     use="generic-host",
22                     check_period="24x7",
23                     check_interval="120",
24                     retry_interval="10",
25                     max_check_attempts="6",
26                     check_command="check_http",
27                     first_notification_delay=0, # 60*24*.5, # wait half a day before taking any action
28                     contact_groups="admins",
29                     register="0"),
30
31               Service(name="planetlab-server-service",
32                     active_checks_enabled="1",
33                     passive_checks_enabled="1",
34                     parallelize_check="1",
35                     obsess_over_service="1",
36                     check_freshness="0",
37                     notifications_enabled="1",
38                     event_handler_enabled="1",
39                     flap_detection_enabled="1",
40                     failure_prediction_enabled="1",
41                     process_perf_data="1",
42                     retain_status_information="1",
43                     retain_nonstatus_information="1",
44                     is_volatile="0",
45                     check_period="24x7",
46                     max_check_attempts="3",
47                     normal_check_interval="15",     # NOTE: make this reasonable for N machines.
48                     retry_check_interval="5",
49                     notification_options="w,u,c,r",
50                     notification_interval="60",
51                     notification_period="24x7",
52                     contact_groups="admins",
53                     register="0")
54             ]
55
56 globalservices = []
57 for service in [('HTTP', "HTTP Server"),
58                 ('API', "PLC API"),
59                 ]:
60     globalservices.append(ServiceGroup(servicegroup_name=service[0], alias=service[1]))
61
62 for obj in globalhost + globalservices:
63     print obj.toString()
64
65 #plc_hosts = [ PLC_MONITOR_HOST, PLC_WWW_HOST, PLC_BOOT_HOST, PLC_PLANETFLOW_HOST, ]
66 plc_hosts = [ PLC_WWW_HOST, PLC_BOOT_HOST, ]
67
68 print HostGroup(hostgroup_name="allplcservers", alias="allplcservers").toString()
69
70 hostname_list = []
71 for host in plc_hosts:
72     shortname = host
73     ip = socket.gethostbyname(host)
74             
75     h = Host(use="planetlab-server",
76                 host_name="%s" % host,
77                 alias=host,
78                 address=ip,
79                 hostgroups="allplcservers")
80
81     print h.toString()
82
83     hostname_list.append(host)
84     
85 # NOTE: use all hostnames at site to create HostEscalations for down-notices
86 if len(hostname_list) > 0:
87
88     hn_list = ",".join(hostname_list)
89
90     s1 = Service(use="planetlab-server-service",
91                     host_name=hn_list,
92                     service_description="API",
93                     display_name="API",
94                     servicegroups="NET,API",
95                     check_command="check_plc_api")
96
97         ## NOTE: try to repair the host, if it is online and 'mode' indicates a problem
98         #se1 = ServiceEscalation(host_name=hn_list,
99         #                        service_description="bRUNLEVEL",
100         #                        first_notification=1,
101         #                        last_notification=0,
102         #                        escalation_options="w,c,r",
103         #                        notification_interval=20,
104         #                        contacts="automate-service-repair-contact")
105
106     for service in [s1]:
107         print service.toString()
108