+ ##########################################################################
+ ##########################################################################
+ ##########################################################################
+
+ # NOTE: Check that we're not stuck in a loop.
+ print Service(use="planetlab-service",
+ host_name=hn_list,
+ service_description="0-CycleCheck",
+ notifications_enabled="1",
+ display_name="0-CycleCheck",
+ check_command="check_cycle!rebootlog").toString()
+ # NOTE: If we are in a loop, then let someone know.
+ print ServiceEscalation(host_name=hn_list,
+ service_description="0-CycleCheck",
+ first_notification=1,
+ last_notification=0,
+ notification_interval=email_notification_interval,
+ escalation_options="c,w",
+ contact_groups="admins").toString()
+ # NOTE: Stop other Escalations if the CycleCheck fails.
+ print ServiceDependency(
+ host_name=hn_list,
+ service_description="0-CycleCheck",
+ dependent_host_name=hn_list,
+ dependent_service_description="aSSH",
+ execution_failure_criteria='c,w',
+ notification_failure_criteria="c,w").toString()
+ print ServiceDependency(
+ host_name=hn_list,
+ service_description="0-CycleCheck",
+ dependent_host_name=hn_list,
+ dependent_service_description="bRUNLEVEL",
+ execution_failure_criteria='c,w',
+ notification_failure_criteria="c,w").toString()
+
+ # NOTE: define services that run on the host.
+ print Service(use="planetlab-service",
+ host_name=hn_list,
+ service_description="aSSH",
+ notifications_enabled="1",
+ display_name="aSSH",
+ servicegroups="NET,SSH",
+ check_command="check_ssh!-t 120").toString()
+ # NOTE: before sending any notices, attempt to reboot host twice
+ print ServiceEscalation(host_name=hn_list,
+ service_description="aSSH",
+ first_notification=1,
+ last_notification=2,
+ notification_interval=action_notification_interval,
+ escalation_options="c",
+ contacts="automate-host-reboot-contact").toString()
+ # NOTE: after trying to reboot the node, send periodic notices regarding this host being down.
+ # Even if the site is not down, some notice should go out.
+ print ServiceEscalation( host_name=hn_list,
+ service_description="aSSH",
+ first_notification=3,
+ last_notification=0,
+ notification_interval=email_notification_interval*2,
+ escalation_options="c,w,r",
+ contact_groups="%s-techs" % lb).toString()
+
+ #print Service(use="planetlab-service",
+ # host_name=hn_list,
+ # service_description="cPCU",
+ # notes_url="%s/db/sites/index.php?id=%s" % (auth.www, site['site_id']),
+ # display_name="cPCU",
+ # servicegroups="NET,PCU",
+ # notifications_enabled="0",
+ # check_command="check_pcu").toString()
+ #print ServiceDependency(
+ # host_name="boot.planet-lab.org",
+ # service_description="API",
+ # dependent_host_name=hn_list,
+ # dependent_service_description="cPCU",
+ # execution_failure_criteria='c,w',
+ # notification_failure_criteria="c,w").toString()
+ #print ServiceEscalation( host_name=hn_list,
+ # service_description="cPCU",
+ # first_notification=1,
+ # last_notification=0,
+ # notification_interval=40, # 24*60*.5,
+ # escalation_options="w,c,r",
+ # contact_groups="%s-techs" % lb).toString()
+
+ print Service(use="planetlab-service",
+ host_name=hn_list,
+ service_description="bRUNLEVEL",
+ display_name="bRUNLEVEL",
+ servicegroups="NET,RUNLEVEL",
+ notifications_enabled="1",
+ check_command="check_mode").toString()
+ # NOTE: check runlevel cannot run without the API
+ print ServiceDependency(
+ host_name="boot.planet-lab.org",
+ service_description="API",
+ dependent_host_name=hn_list,
+ dependent_service_description="bRUNLEVEL",
+ execution_failure_criteria='c,w',
+ notification_failure_criteria="c,w").toString()
+ # NOTE: check_mode critical is probably offline. warning is repairable.
+ # NOTE: try to repair the host, if it is online and 'mode' indicates a problem
+ print ServiceEscalation(host_name=hn_list,
+ service_description="bRUNLEVEL",
+ first_notification=1,
+ last_notification=0,
+ escalation_options="w",
+ notification_interval=action_notification_interval,
+ contacts="automate-service-repair-contact").toString()