# TODO: Verify files and dirs exists already
# TODO: Blacklist nodes!
+# TODO: Unify delays!!
+# TODO: Validate outcome of uploads!!
+
+reschedule_delay = "0.5s"
-DELAY ="1s"
@clsinit
class LinuxNode(ResourceManager):
@property
def home(self):
- return self.get("home") or "/tmp"
+ return self.get("home") or ""
@property
- def exp_dir(self):
- exp_dir = os.path.join(self.home, self.ec.exp_id)
- return exp_dir if exp_dir.startswith('/') else "${HOME}/"
+ def exp_home(self):
+ return os.path.join(self.home, self.ec.exp_id)
@property
- def node_dir(self):
- node_dir = "node-%d" % self.guid
- return os.path.join(self.exp_dir, node_dir)
+ def node_home(self):
+ node_home = "node-%d" % self.guid
+ return os.path.join(self.exp_home, node_home)
@property
def os(self):
def provision(self, filters = None):
if not self.is_alive():
self._state = ResourceState.FAILED
- self.error("Deploy failed. Unresponsive node")
- return
+ msg = "Deploy failed. Unresponsive node %s" % self.get("hostname")
+ self.error(msg)
+ raise RuntimeError, msg
if self.get("cleanProcesses"):
self.clean_processes()
if self.get("cleanHome"):
self.clean_home()
- self.mkdir(self.node_dir)
+ self.mkdir(self.node_home)
super(LinuxNode, self).provision()
ifaces = self.get_connected(LinuxInterface.rtype())
for iface in ifaces:
if iface.state < ResourceState.READY:
- self.ec.schedule(DELAY, self.deploy)
+ self.ec.schedule(reschedule_delay, self.deploy)
return
super(LinuxNode, self).deploy()
"sudo -S killall -u %s || /bin/true ; " % self.get("username") +
"sudo -S killall -u %s || /bin/true ; " % self.get("username"))
-
out = err = ""
(out, err), proc = self.execute(cmd, retry = 1, with_lock = True)
def clean_home(self):
self.info("Cleaning up home")
-
- cmd = ("cd %s ; " % self.home +
- "find . -maxdepth 1 \( -name '.cache' -o -name '.local' -o -name '.config' -o -name 'nepi-*' \)"+
- " -execdir rm -rf {} + ")
+
+ cmd = (
+ # "find . -maxdepth 1 \( -name '.cache' -o -name '.local' -o -name '.config' -o -name 'nepi-*' \)" +
+ "find . -maxdepth 1 -name 'nepi-*' " +
+ " -execdir rm -rf {} + "
+ )
+
+ if self.home:
+ cmd = "cd %s ; " % self.home + cmd
out = err = ""
(out, err), proc = self.execute(cmd, with_lock = True)
return self.copy(src, dst)
def install_packages(self, packages, home = None):
- home = home or self.node_dir
+ home = home or self.node_home
cmd = ""
if self.os in ["f12", "f14"]:
out = err = ""
(out, err), proc = self.run_and_wait(cmd, home,
pidfile = "instpkg_pid",
- stdout = "instpkg_log",
- stderr = "instpkg_err",
+ stdout = "instpkg_out",
+ stderr = "instpkg_err",
raise_on_error = True)
return (out, err), proc
def remove_packages(self, packages, home = None):
- home = home or self.node_dir
+ home = home or self.node_home
cmd = ""
if self.os in ["f12", "f14"]:
out = err = ""
(out, err), proc = self.run_and_wait(cmd, home,
pidfile = "rmpkg_pid",
- stdout = "rmpkg_log",
- stderr = "rmpkg_err",
+ stdout = "rmpkg_out",
+ stderr = "rmpkg_err",
raise_on_error = True)
return (out, err), proc
stdout = 'stdout',
stderr = 'stderr',
sudo = False,
+ tty = False,
raise_on_error = False):
""" runs a command in background on the remote host, but waits
until the command finishes execution.
stdin = stdin,
stdout = stdout,
stderr = stderr,
- sudo = sudo)
+ sudo = sudo,
+ tty = tty)
# check no errors occurred
if proc.poll() and err:
def check_output(self, home, filename):
""" checks file content """
(out, err), proc = self.execute("cat %s" %
- os.path.join(home, filename), with_lock = True)
+ os.path.join(home, filename), retry = 1, with_lock = True)
return (out, err), proc
def is_alive(self):
out = err = ""
try:
- (out, err), proc = self.execute("echo 'ALIVE'", with_lock = True)
+ # TODO: FIX NOT ALIVE!!!!
+ (out, err), proc = self.execute("echo 'ALIVE' || (echo 'NOTALIVE') >&2", retry = 5,
+ with_lock = True)
except:
import traceback
trace = traceback.format_exc()
- msg = "Unresponsive host "
- self.warn(msg, out, trace)
+ msg = "Unresponsive host %s " % err
+ self.error(msg, out, trace)
return False
if out.strip().startswith('ALIVE'):
return True
else:
msg = "Unresponsive host "
- self.warn(msg, out, err)
+ self.error(msg, out, err)
return False
- # TODO!
- #if self.check_bad_host(out,err):
- # self.blacklist()
-
def copy(self, src, dst):
if self.localhost:
(out, err), proc = execfuncs.lcopy(source, dest,
- recursive = True)
+ recursive = True,
+ strict_host_checking = False)
else:
with self._lock:
(out, err), proc = sshfuncs.rcopy(
port = self.get("port"),
identity = self.get("identity"),
server_key = self.get("serverKey"),
- recursive = True)
+ recursive = True,
+ strict_host_checking = False)
return (out, err), proc
retry = 3,
err_on_timeout = True,
connect_timeout = 30,
+ strict_host_checking = False,
persistent = True,
with_lock = False
):
retry = retry,
err_on_timeout = err_on_timeout,
connect_timeout = connect_timeout,
- persistent = persistent
+ persistent = persistent,
+ strict_host_checking = strict_host_checking
)
else:
(out, err), proc = sshfuncs.rexec(
def run(self, command,
home = None,
- create_home = True,
+ create_home = False,
pidfile = "pid",
stdin = None,
stdout = 'stdout',
stderr = 'stderr',
- sudo = False):
+ sudo = False,
+ tty = False):
- self.debug("Running %s" % command)
+ self.debug("Running command '%s'" % command)
if self.localhost:
(out, err), proc = execfuncs.lspawn(command, pidfile,
port = self.get("port"),
agent = True,
identity = self.get("identity"),
- server_key = self.get("serverKey")
+ server_key = self.get("serverKey"),
+ tty = tty
)
return (out, err), proc