import ssh.pexpect as pexpect
from unified_model import *
from emailTxt import mailtxt
-
+from nodeconfig import network_config_to_str
+import traceback
import monitorconfig
import signal
try:
k = SSHKnownHosts(); k.update(node); k.write(); del k
except:
- import traceback; print traceback.print_exc()
+ print traceback.print_exc()
return False
try:
session = PlanetLabSession(node, config.nosetup, config.verbose)
except Exception, e:
print "ERROR setting up session for %s" % hostname
- import traceback; print traceback.print_exc()
+ print traceback.print_exc()
print e
return False
time.sleep(session.timeout*4)
conn = session.get_connection(config)
except:
- import traceback; print traceback.print_exc()
+ print traceback.print_exc()
return False
('noinstall' , 'notinstalled'),
('bziperror' , 'bzip2: Data integrity error when decompressing.'),
('noblockdev' , "No block devices detected."),
+ ('dnserror' , 'Name or service not known'),
('downloadfail' , 'Unable to download main tarball /boot/bootstrapfs-planetlab-i386.tar.bz2 from server.'),
('disktoosmall' , 'The total usable disk size of all disks is insufficient to be usable as a PlanetLab node.'),
('hardwarerequirefail' , 'Hardware requirements not met'),
for n in ["bminit-cfg-auth-getplc-update-installinit-validate-rebuildinitrd-netcfg-update3-disk-update4-done",
"bminit-cfg-auth-getplc-installinit-validate-rebuildinitrd-netcfg-update3-disk-update4-update3-exception-protoerror-update-protoerror-debug-done",
"bminit-cfg-auth-getplc-installinit-validate-rebuildinitrd-netcfg-disk-update4-update3-update3-implementerror-bootupdatefail-update-debug-done",
+
+ "bminit-cfg-auth-getplc-installinit-validate-rebuildinitrd-netcfg-disk-update4-update3-update3-exception-protoerror-update-protoerror-debug-done",
+
"bminit-cfg-auth-getplc-installinit-validate-rebuildinitrd-netcfg-update3-disk-update4-update3-exception-protoerror-update-debug-done",
"bminit-cfg-auth-getplc-installinit-validate-rebuildinitrd-netcfg-disk-update4-update3-exception-chrootfail-update-debug-done",
"bminit-cfg-auth-getplc-update-debug-done",
"bminit-cfg-auth-protoerror-exception-update-protoerror-debug-done",
"bminit-cfg-auth-protoerror-exception-update-bootupdatefail-authfail-debug-done",
"bminit-cfg-auth-protoerror-exception-update-debug-done",
+ "bminit-cfg-auth-getplc-exception-protoerror-update-debug-done",
"bminit-cfg-auth-getplc-implementerror-update-debug-done",
]:
sequences.update({n : "restart_bootmanager_boot"})
# broken_hardware_email
sequences.update({"bminit-cfg-auth-getplc-update-hardware-exception-hardwarerequirefail-update-debug-done" : "broken_hardware_email"})
+ # bad_dns_email
+ sequences.update({"bminit-cfg-update-implementerror-bootupdatefail-dnserror-update-implementerror-bootupdatefail-dnserror-done" : "bad_dns_email"})
+
flag_set = True
m.send([policy.PIEMAIL % loginbase, policy.TECHEMAIL % loginbase])
conn.set_nodestate('disable')
+ elif sequences[s] == "bad_dns_email":
+ print "...NOTIFYING OWNERS OF DNS FAILURE on %s!!!" % hostname
+ args = {}
+ try:
+ node = api.GetNodes(hostname)[0]
+ net = api.GetNodeNetworks(node['nodenetwork_ids'])[0]
+ except:
+ print traceback.print_exc()
+ # TODO: api error. skip email, b/c all info is not available,
+ # flag_set will not be recorded.
+ return False
+ nodenet_str = network_config_to_str(net)
+
+ args['hostname'] = hostname
+ args['network_config'] = nodenet_str
+ args['nodenetwork_id'] = net['nodenetwork_id']
+ m = PersistMessage(hostname, mailtxt.baddns[0] % args,
+ mailtxt.baddns[1] % args, True, db='baddns_persistmessages')
+
+ loginbase = plc.siteId(hostname)
+ m.send([policy.PIEMAIL % loginbase, policy.TECHEMAIL % loginbase])
+ conn.set_nodestate('disable')
+
if flag_set:
pflags.setRecentFlag(s)
pflags.save()
help="Extra quiet output messages.")
parser.add_option("", "--verbose", dest="verbose", action="store_true",
help="Extra debug output messages.")
+ parser.add_option("", "--nonet", dest="nonet", action="store_true",
+ help="Do not setup the network, use existing log files to re-run a test pass.")
parser.add_option("", "--collect", dest="collect", action="store_true",
help="No action, just collect dmesg, and bm.log")
parser.add_option("", "--nosetup", dest="nosetup", action="store_true",
donation_nopcu = [ donation_nopcu_one, donation_nopcu_one, donation_nopcu_one ]
donation_down = [ donation_down_one, donation_down_one, donation_down_one ]
+
+
minimalhardware = ("""Hardware requirements not met on PlanetLab host %(hostname)s""",
"""
While trying to automatically recover this machine:
""")
- dns=("""Planetlab node %(hostname)s down.""", """As part of PlanetLab node monitoring, we noticed the DNS servers used by %(hostname)s are not responding to queries.
+ baddns=("""Planetlab node down: broken DNS configuration for %(hostname)s""",
+"""As part of PlanetLab node monitoring, we noticed the DNS servers used by the following machine(s) are not responding to queries.
-Please verify the DNS information used by the node is correct. You can find directions on how to update the node's network information on the PlanetLab Technical Contacts Guid (http://www.planet-lab.org/doc/TechsGuide.php#id268898).
+ %(hostname)s
-Thanks.
+The conseuqnece of this is that the node cannot boot correctly, and is not a functioning part of the PlanetLab network.
+
+To help us return this machine to running order, please verify that the registered DNS servers in the node network configuration are correct.
+
+%(network_config)s
+You may update the node's network information at the link below:
+
+ https://www.planet-lab.org/db/nodes/node_networks.php?id=%(nodenetwork_id)s
+
+If you have any questions, please feel free to contact us at PlanetLab Support (support@planet-lab.org).
+
+Thank you for your help,
-- PlanetLab Central (support@planet-lab.org)
""")
from nodecommon import *
import database
+def network_config_to_str(net):
+
+ str = ""
+ static_keys = ['method', 'ip', 'gateway', 'network', 'broadcast', 'netmask', 'dns1', 'dns2', 'mac', 'is_primary']
+ for k in static_keys:
+ str += "%15s == %s\n" % (k, net[k])
+
+ return str
+
+
def main():
from config import config
fb = database.dbLoad("findbad")
else:
print "%15s == %s" % (k, n[k])
- static_keys = ['method', 'ip', 'gateway', 'network', 'broadcast', 'netmask', 'dns1', 'dns2', 'mac', 'is_primary']
- for k in static_keys:
- print "%15s == %s" % (k, net[k])
+ print network_config_to_str(net)
#for k in net.keys():
# print k, "==" , net[k]