Commit 2e6328c1 authored by Dmitry Duplyakin's avatar Dmitry Duplyakin

SLURM: add update call for config

Also add a new call in util.py for reading a file and cleaning up its contents
parent 944cc1cc
import time
import re
import logging
from shutil import copyfile
from elasticslice.managers.core import SimpleElasticSliceHelper, \
SimpleElasticSliceManager
from elasticslice.util.util import ShellCommand
from elasticslice.util.log import configure_file_logging
from shutil import copyfile
import time
from elasticslice.util.util import get_file_contents
SECOND = 1
MINUTE = SECOND * 60
......@@ -64,7 +65,7 @@ class SlurmDynamicManager(SimpleElasticSliceHelper,SimpleElasticSliceManager):
self.exp_lan = 'link-1'
# Add SLURM related capabilities
self.slurm = SlurmScheduler()
self.slurm = SlurmScheduler(config=self.config)
# File logger for special events
configure_file_logging('file_log', '/tmp/SlurmDynamicManager.log')
......@@ -148,8 +149,10 @@ class SlurmDynamicManager(SimpleElasticSliceHelper,SimpleElasticSliceManager):
and @status is either 'ready', 'failed'. The return value is
not checked by the caller.
"""
self.file_log.debug("Handling added node: %s; Status: %s" % (node, status))
self.update_etc_hosts()
self.slurm.update_slurm_config(node_list=self.get_nodenames())
pass
def handle_deleted_node(self,node):
......@@ -161,8 +164,10 @@ class SlurmDynamicManager(SimpleElasticSliceHelper,SimpleElasticSliceManager):
The return value is not checked by the caller.
"""
self.file_log.debug("Handling deleted node: %s" % node)
self.update_etc_hosts()
self.slurm.update_slurm_config(node_list=self.get_nodenames())
pass
def get_nodenames(self, include_IPs=False, include_fullnames=False):
......@@ -184,20 +189,6 @@ class SlurmDynamicManager(SimpleElasticSliceHelper,SimpleElasticSliceManager):
result.append(line)
return result
def get_etc_hosts(self, cleanup=True):
"""
This method returns the contents of the local /etc/hosts file.
Optionally, it cleans up the contents by removing EOL chars and replacing tabs with spaces,
as well removes leading, trailing, and duplicate spaces. It will also remove empty lines.
"""
with open('/etc/hosts', "r") as f:
contents = f.readlines()
if cleanup:
return filter(None, [' '.join(l.rstrip('\n').replace('\t',' ').strip().split()) for l in contents])
else:
return contents
def update_etc_hosts(self):
"""
This method updates /etc/hosts file. It adds a block of entries for all experiment nodes
......@@ -207,9 +198,9 @@ class SlurmDynamicManager(SimpleElasticSliceHelper,SimpleElasticSliceManager):
"""
MARKER = "# AUTO-GENERATED BLOCK - DO NOT EDIT BY HAND"
END_LINE = "# END OF AUTO-GENERATED BLOCK - DO NOT ADD LINES BELOW"
TMP_FILE = "/etc/.new_hosts"
TMP_FILE = "/tmp/.new_hosts"
contents = self.get_etc_hosts(cleanup=True)
contents = get_file_contents("/etc/hosts", cleanup=True)
new_block = [MARKER] + self.get_nodenames(include_IPs=True, include_fullnames=True) + [END_LINE]
detected_change = False
......@@ -238,8 +229,9 @@ class SlurmDynamicManager(SimpleElasticSliceHelper,SimpleElasticSliceManager):
class SlurmScheduler(object):
"""Define operations for interacting with the SLURM resource manager and scheduler"""
def __init__(self):
def __init__(self, config=None):
LOG.debug("Starting SlurmScheduler object")
self.config = config
# List of dislayed fields; all flags are documented at: http://slurm.schedmd.com/squeue.html
self.squeue_format = "%i,%P,%j,%u,%T,%M,%l,%D,%R,%p,%C,%D,%e"
......@@ -325,3 +317,63 @@ class SlurmScheduler(object):
rj = self.list_running_jobs(show_header=False)
LOG.debug("Running jobs: %s" % rj)
return len(pj) + len(rj)
def update_slurm_config(self, node_list=[]):
"""
This method updates SLURM configuration in /etc/slurm-llnl/slurm.conf.
It adds an auto-generated block with lines such as "NodeName=" and "PartitionName="
with all current experiment nodes at the end of the file, unless they such block is already there.
A special marker is used to determine whether such lines are present or not.
This method preserves all lines that exist in the file before this block.
Arguments:
node_list -- list of short (virtual) nodenames to be added in SLURM config; if empty, the method does nothing
"""
if not node_list:
return
if "SLURM::partition_name" in self.config.all:
partition = self.config.all["SLURM::partition_name"]
else:
LOG.debug("Config file is missing 'partition_name' key in the 'SLURM' section. Using: 'all'")
partition = "all"
CONF_FILE = "/etc/slurm-llnl/slurm.conf"
TMP_FILE = "/tmp/.slurm.conf"
MARKER = "# COMPUTE NODES BLOCK - DO NOT EDIT BY HAND"
END_LINE = "# END OF COMPUTE NODES BLOCK - DO NOT ADD LINES BELOW"
node_list_comma_separated = ",".join(node_list)
NODE_LINE="NodeName=%s State=UNKNOWN" % node_list_comma_separated
PARTITION_LINE="PartitionName=%s Nodes=%s Default=YES MaxTime=INFINITE State=UP" % (partition, node_list_comma_separated)
contents = get_file_contents(CONF_FILE, cleanup=True)
new_block = [MARKER, NODE_LINE, PARTITION_LINE, END_LINE]
detected_change = False
if MARKER in contents:
pos = contents.index(MARKER)
old_block = contents[pos:]
if set(old_block) != set(new_block):
# Old block is really different than the new one
detected_change = True
new_contents = contents[:pos] + new_block
else:
detected_change = True
new_contents = contents + new_block
if detected_change:
with open(TMP_FILE, "w") as f:
f.write("\n".join(new_contents))
# Make a backup copy of the old config
time_suff = time.strftime("-%Y%m%d-%H%M%S")
copyfile(CONF_FILE, CONF_FILE + time_suff)
# Replace the old file with the new one
copyfile(TMP_FILE, CONF_FILE)
......@@ -361,3 +361,17 @@ class DefaultSubcommandArgumentParser(argparse.ArgumentParser):
arg_strings, *args, **kwargs)
pass
def get_file_contents(file_path, cleanup=True):
"""
This method returns the contents of the file at the specified path.
Optionally, it cleans up the contents by removing EOL chars and replacing tabs with spaces,
as well removes leading, trailing, and duplicate spaces. It will also remove empty lines.
"""
with open(file_path, "r") as f:
contents = f.readlines()
if cleanup:
return filter(None, [' '.join(l.rstrip('\n').replace('\t',' ').strip().split()) for l in contents])
else:
return contents
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment