# ICE Revision: $Id$
"""Encapsulates all necessary things for a cluster-job, like setting
up, running, restarting"""
import os,sys,subprocess
from os import path,unlink
from threading import Thread,Lock,Timer
from PyFoam.Applications.Decomposer import Decomposer
from PyFoam.Applications.Runner import Runner
from PyFoam.Applications.SteadyRunner import SteadyRunner
from PyFoam.Applications.CloneCase import CloneCase
from PyFoam.Applications.FromTemplate import FromTemplate
from PyFoam.Applications.PrepareCase import PrepareCase
from PyFoam.Applications.RunParameterVariation import RunParameterVariation
from PyFoam.FoamInformation import changeFoamVersion
from PyFoam.FoamInformation import foamVersion as getFoamVersion
from PyFoam.Error import error,warning
from PyFoam import configuration as config
from PyFoam.FoamInformation import oldAppConvention as oldApp
from PyFoam.RunDictionary.SolutionDirectory import SolutionDirectory
from PyFoam.ThirdParty.six import print_,iteritems
[docs]class ClusterJob(object):
""" All Cluster-jobs are to be derived from this base-class
The actual jobs are implemented by overriding methods
There is a number of variables in this class that are used to
'communicate' information between the various stages"""
def __init__(self,
basename,
arrayJob=False,
hardRestart=False,
autoParallel=True,
doAutoReconstruct=None,
foamVersion=None,
compileOption=None,
useFoamMPI=False,
multiRegion=False,
parameters={},
isDecomposed=False):
"""Initializes the Job
:param basename: Basis name of the job
:param arrayJob: this job is a parameter variation. The tasks
are identified by their task-id
:param hardRestart: treat the job as restarted
:param autoParallel: Parallelization is handled by the base-class
:param doAutoReconstruct: Automatically reconstruct the case if
autoParalellel is set. If the value is None then it is looked up from
the configuration
:param foamVersion: The foam-Version that is to be used
:param compileOption: Forces compile-option (usually 'Opt' or 'Debug')
:param useFoamMPI: Use the OpenMPI supplied with OpenFOAM
:param multiRegion: This job consists of multiple regions
:param parameters: Dictionary with parameters that are being passed to the Runner
:param isDecomposed: Assume that the job is already decomposed"""
# print_(os.environ)
if not "JOB_ID" in os.environ:
error("Not an SGE-job. Environment variable JOB_ID is missing")
self.jobID=int(os.environ["JOB_ID"])
self.jobName=os.environ["JOB_NAME"]
self.basename=path.join(path.abspath(path.curdir),basename)
sgeRestarted=False
if "RESTARTED" in os.environ:
sgeRestarted=(int(os.environ["RESTARTED"])!=0)
if sgeRestarted or hardRestart:
self.restarted=True
else:
self.restarted=False
if foamVersion==None:
foamVersion=config().get("OpenFOAM","Version")
changeFoamVersion(foamVersion,compileOption=compileOption)
if not "WM_PROJECT_VERSION" in os.environ:
error("No OpenFOAM-Version seems to be configured. Set the foamVersion-parameter")
self.autoParallel=autoParallel
self.doAutoReconstruct=doAutoReconstruct
if self.doAutoReconstruct==None:
self.doAutoReconstruct=config().getboolean("ClusterJob","doAutoReconstruct")
self.multiRegion=multiRegion
self.parameters=parameters
self.hostfile=None
self.nproc=1
if "NSLOTS" in os.environ:
self.nproc=int(os.environ["NSLOTS"])
self.message("Running on",self.nproc,"CPUs")
if self.nproc>1:
# self.hostfile=os.environ["PE_HOSTFILE"]
self.hostfile=path.join(os.environ["TMP"],"machines")
if config().getboolean("ClusterJob","useMachineFile"):
self.message("Using the machinefile",self.hostfile)
self.message("Contents of the machinefile:",open(self.hostfile).readlines())
else:
self.message("No machinefile used because switched off with 'useMachineFile'")
self.ordinaryEnd=True
self.listenToTimer=False
self.taskID=None
self.arrayJob=arrayJob
if self.arrayJob:
self.taskID=int(os.environ["SGE_TASK_ID"])
if not useFoamMPI and not foamVersion in eval(config().get("ClusterJob","useFoamMPI",default='[]')):
## prepend special paths for the cluster
self.message("Adding Cluster-specific paths")
os.environ["PATH"]=config().get("ClusterJob","path")+":"+os.environ["PATH"]
os.environ["LD_LIBRARY_PATH"]=config().get("ClusterJob","ldpath")+":"+os.environ["LD_LIBRARY_PATH"]
self.isDecomposed=isDecomposed
[docs] def fullJobId(self):
"""Return a string with the full job-ID"""
result=str(self.jobID)
if self.arrayJob:
result+=":"+str(self.taskID)
return result
[docs] def message(self,*txt):
print_("=== CLUSTERJOB: ",end="")
for t in txt:
print_(t,end=" ")
print_(" ===")
sys.stdout.flush()
[docs] def setState(self,txt):
self.message("Setting Job state to",txt)
fName=path.join(self.casedir(),"ClusterJobState")
f=open(fName,"w")
f.write(txt+"\n")
f.close()
[docs] def jobFile(self):
"""The file with the job information"""
jobfile="%s.%d" % (self.jobName,self.jobID)
if self.arrayJob:
jobfile+=".%d" % self.taskID
jobfile+=".pyFoam.clusterjob"
jobfile=path.join(path.dirname(self.basename),jobfile)
return jobfile
[docs] def checkpointFile(self):
"""The file that makes the job write a checkpoint"""
return self.jobFile()+".checkpoint"
[docs] def stopFile(self):
"""The file that makes the job write a checkpoint and end"""
return self.jobFile()+".stop"
[docs] def doIt(self):
"""The central logic. Runs the job, sets it up etc"""
f=open(self.jobFile(),"w")
f.write(path.basename(self.basename)+"\n")
f.close()
self.message()
self.message("Running on directory",self.casename())
self.message()
self.setState("Starting up")
if self.arrayJob:
for k,v in list(self.taskParameters(self.taskID).items()):
self.parameters[k]=v
self.parameters.update(self.additionalParameters())
self.message("Parameters:",self.parameters)
if not self.restarted:
self.setState("Setting up")
self.setup(self.parameters)
if self.autoParallel and self.nproc>1 and not self.isDecomposed:
self.setState("Decomposing")
self.autoDecompose()
self.isDecomposed=True
self.setState("Setting up 2")
self.postDecomposeSetup(self.parameters)
else:
self.setState("Restarting")
self.isDecomposed=True
self.setState("Running")
self.listenToTimer=True
self.timer=Timer(1.,checkForMessageFromAbove,args=[self])
self.timer.start()
self.run(self.parameters)
self.listenToTimer=False
if path.exists(self.jobFile()):
unlink(self.jobFile())
if self.ordinaryEnd:
self.setState("Post Running")
self.preReconstructCleanup(self.parameters)
if self.autoParallel and self.nproc>1:
self.setState("Reconstructing")
self.autoReconstruct()
if self.nproc>0:
self.additionalReconstruct(self.parameters)
self.setState("Cleaning")
self.cleanup(self.parameters)
self.setState("Finished")
else:
self.setState("Suspended")
if path.exists(self.stopFile()):
unlink(self.stopFile())
if path.exists(self.checkpointFile()):
unlink(self.checkpointFile())
[docs] def casedir(self):
"""Returns the actual directory of the case
To be overridden if appropriate"""
if self.arrayJob:
return "%s.%05d" % (self.basename,self.taskID)
else:
return self.basename
[docs] def casename(self):
"""Returns just the name of the case"""
return path.basename(self.casedir())
[docs] def execute(self,cmd):
"""Execute a shell command in the case directory. No checking done
:param cmd: the command as a string"""
oldDir=os.getcwd()
self.message("Changing directory to",self.casedir())
os.chdir(self.casedir())
self.message("Executing",cmd)
try:
retcode = subprocess.call(cmd,shell=True)
if retcode < 0:
self.message(cmd,"was terminated by signal", -retcode)
else:
self.message(cmd,"returned", retcode)
except OSError:
e = sys.exc_info()[1] # Needed because python 2.5 does not support 'as e'
self.message(cmd,"Execution failed:", e)
self.message("Executiong of",cmd,"ended")
self.message("Changing directory back to",oldDir)
os.chdir(oldDir)
[docs] def templateFile(self,fileName):
"""Looks for a template file and evaluates the template using
the usual parameters
:param fileName: the name of the file that will be
constructed. The template file is the same plus the extension '.template'"""
self.message("Building file",fileName,"from template with parameters",
self.parameters)
argList=["--output-file=%s" % path.join(self.casedir(),fileName),
"--dump-used-values"
]
tmpl=FromTemplate(args=argList,
parameters=self.parameters)
[docs] def foamRun(self,application,
args=[],
foamArgs=[],
steady=False,
multiRegion=True,
progress=False,
compress=False,
noLog=False):
"""Runs a foam utility on the case.
If it is a parallel job and the grid has
already been decomposed (and not yet reconstructed) it is run in
parallel
:param application: the Foam-Application that is to be run
:param foamArgs: A list if with the additional arguments for the
Foam-Application
:param compress: Compress the log-file
:param args: A list with additional arguments for the Runner-object
:param steady: Use the steady-runner
:param multiRegion: Run this on multiple regions (if None: I don't have an opinion on this)
:param progress: Only output the time and nothing else
:param noLog: Do not generate a logfile"""
arglist=args[:]
arglist+=["--job-id=%s" % self.fullJobId()]
for k,v in iteritems(self.parameters):
arglist+=["--parameter=%s:%s" % (str(k),str(v))]
if self.isDecomposed and self.nproc>1:
arglist+=["--procnr=%d" % self.nproc]
if config().getboolean("ClusterJob","useMachineFile"):
arglist+=["--machinefile=%s" % self.hostfile]
arglist+=["--echo-command-prefix='=== Executing'"]
if progress:
arglist+=["--progress"]
if noLog:
arglist+=["--no-log"]
if compress:
arglist+=["--compress"]
if self.multiRegion:
if multiRegion:
arglist+=["--all-regions"]
elif multiRegion:
warning("This is not a multi-region case, so trying to run stuff multi-region won't do any good")
if self.restarted:
arglist+=["--restart"]
arglist+=[application]
if oldApp():
arglist+=[".",self.casename()]
else:
arglist+=["-case",self.casename()]
arglist+=foamArgs
self.message("Executing",arglist)
if steady:
self.message("Running Steady")
runner=SteadyRunner(args=arglist)
else:
runner=Runner(args=arglist)
[docs] def autoDecompose(self):
"""Automatically decomposes the grid with a metis-algorithm"""
if path.isdir(path.join(self.casedir(),"processor0")):
warning("A processor directory already exists. There might be a problem")
defaultMethod="metis"
if getFoamVersion()>=(1,6):
defaultMethod="scotch"
args=["--method="+defaultMethod,
"--clear",
self.casename(),
self.nproc,
"--job-id=%s" % self.fullJobId()]
if self.multiRegion:
args.append("--all-regions")
deco=Decomposer(args=args)
[docs] def autoReconstruct(self):
"""Default reconstruction of a parallel run"""
if self.doAutoReconstruct:
self.isDecomposed=False
self.foamRun("reconstructPar",
args=["--logname=ReconstructPar"])
else:
self.message("No reconstruction (because asked to)")
[docs] def setup(self,parameters):
"""Set up the job. Called in the beginning if the
job has not been restarted
Usual tasks include grid conversion/setup, mesh decomposition etc
:param parameters: a dictionary with parameters"""
pass
[docs] def postDecomposeSetup(self,parameters):
"""Additional setup, to be executed when the grid is already decomposed
Usually for tasks that can be done on a decomposed grid
:param parameters: a dictionary with parameters"""
pass
[docs] def run(self,parameters):
"""Run the actual job. Usually the solver.
:param parameters: a dictionary with parameters"""
pass
[docs] def preReconstructCleanup(self,parameters):
"""Additional cleanup, to be executed when the grid is still decomposed
Usually for tasks that can be done on a decomposed grid
:param parameters: a dictionary with parameters"""
pass
[docs] def cleanup(self,parameters):
"""Clean up after a job
:param parameters: a dictionary with parameters"""
pass
[docs] def additionalReconstruct(self,parameters):
"""Additional reconstruction of parallel runs (Stuff that the
OpenFOAM-reconstructPar doesn't do
:param parameters: a dictionary with parameters"""
pass
[docs] def taskParameters(self,id):
"""Parameters for a specific task
:param id: the id of the task
:return: a dictionary with parameters for this task"""
error("taskParameter not implemented. Not a parameterized job")
return {}
[docs] def additionalParameters(self):
"""Additional parameters
:return: a dictionary with parameters for this task"""
warning("Method 'additionalParameters' not implemented. Not a problem. Just saying")
return {}
[docs] def writeCheckpoint(self):
if self.listenToTimer:
f=open(path.join(self.basename,"write"),"w")
f.write("Jetzt will ich's wissen")
f.close()
unlink(self.checkpointFile())
else:
warning("I'm not listening to your callbacks")
self.timer=Timer(1.,checkForMessageFromAbove,args=[self])
[docs] def stopJob(self):
if self.listenToTimer:
self.ordinaryEnd=False
f=open(path.join(self.basename,"stop"),"w")
f.write("Geh z'haus")
f.close()
unlink(self.stopFile())
else:
warning("I'm not listening to your callbacks")
[docs]class SolverJob(ClusterJob):
"""A Cluster-Job that executes a solver. It implements the run-function.
If a template-case is specified, the case is copied"""
def __init__(self,basename,solver,
template=None,
cloneParameters=[],
arrayJob=False,
hardRestart=False,
autoParallel=True,
doAutoReconstruct=None,
foamVersion=None,
compileOption=None,
useFoamMPI=False,
steady=False,
multiRegion=False,
parameters={},
progress=False,
solverArgs=[],
solverProgress=False,
solverNoLog=False,
solverLogCompress=False,
isDecomposed=False):
""":param template: Name of the template-case. It is assumed that
it resides in the same directory as the actual case
:param cloneParameters: a list with additional parameters for the
CloneCase-object that copies the template
:param solverProgress: Only writes the current time of the solver"""
ClusterJob.__init__(self,basename,
arrayJob=arrayJob,
hardRestart=hardRestart,
autoParallel=autoParallel,
doAutoReconstruct=doAutoReconstruct,
foamVersion=foamVersion,
compileOption=compileOption,
useFoamMPI=useFoamMPI,
multiRegion=multiRegion,
parameters=parameters,
isDecomposed=isDecomposed)
self.solver=solver
self.steady=steady
if template!=None and not self.restarted:
template=path.join(path.dirname(self.casedir()),template)
if path.abspath(basename)==path.abspath(template):
error("The basename",basename,"and the template",template,"are the same directory")
if isDecomposed:
cloneParameters+=["--parallel"]
self.message("Cloning from template",template)
clone=CloneCase(
args=cloneParameters+[template,self.casedir(),"--follow-symlinks"])
self.solverProgress=solverProgress
self.solverNoLog=solverNoLog
self.solverLogCompress=solverLogCompress
self.solverArgs=solverArgs
[docs] def run(self,parameters):
self.foamRun(self.solver,
steady=self.steady,
foamArgs=self.solverArgs,
multiRegion=False,
progress=self.solverProgress,
noLog=self.solverNoLog,
compress=self.solverLogCompress)
[docs]class PrepareCaseJob(SolverJob):
"""Assumes that the case is prepared to be set up with
=pyFoamPrepareCase.py= and automatically sets it up with
this. Needs one parameterfile to be specified and then a list of
name/value-pairs
"""
def __init__(self,basename,solver,
parameterfile,
arguments,
parameters={},
noMeshCreate=False,
**kwargs):
self.__parameterfile=parameterfile
self.__noMeshCreate=noMeshCreate
para={}
if type(arguments)==list:
if len(arguments) % 2 !=0:
error("Length of arguments should be an even number. Is",len(arguments),
":",arguments)
# make all string arguments that could be boolean boolean values
from PyFoam.Basics.DataStructures import BoolProxy
for k,v in dict(zip(arguments[::2],arguments[1::2])).items():
try:
try:
para[k]=BoolProxy(textual=v).val
except TypeError:
para[k]=int(v)
except ValueError:
try:
para[k]=float(v)
except ValueError:
try:
para[k]=eval(v)
except (SyntaxError,NameError):
para[k]="'"+v+"'"
elif type(arguments)==dict:
para=arguments
else:
error("Type of arguments is ",type(arguments),"Should be 'dict' or 'list':",arguments)
self.__parametervalues=para
parameters.update(self.__parametervalues)
print_("Parameter file",self.__parameterfile)
print_("Parameter values",self.__parametervalues)
SolverJob.__init__(self,basename,solver,
parameters=parameters,
**kwargs)
[docs] def setup(self,parameters):
parameterString=",".join(["'%s':%s"%i for i in parameters.items()])
PrepareCase(args=[self.casedir(),
"--allow-exec",
"--parameter="+path.join(self.casedir(),self.__parameterfile),
"--number-of-processors=%d" % self.nproc,
"--values={"+parameterString+"}"]+
(["--no-mesh-create"] if self.__noMeshCreate else []))
[docs]class VariationCaseJob(SolverJob):
"""Assumes that the case is prepared to be set up with
=pyFoamRunParameterVariation.py= and automatically sets it up with
this. Needs one parameterfile and a variation-file
"""
def __init__(self,basename,
parameterfile,
variationfile,
template=None,
**kwargs):
self.__parameterfile=parameterfile
self.__variationfile=variationfile
print_("Parameter file",self.__parameterfile)
print_("Variation file",self.__variationfile)
data=RunParameterVariation(args=[template,
path.join(template,self.__variationfile),
"--parameter="+path.join(template,self.__parameterfile),
"--list-variations"]).getData()
taskID=int(os.environ["SGE_TASK_ID"])-1
if "solver" in data["variations"][taskID]:
solver=data["variations"][taskID]["solver"]
else:
solver=data["fixed"]["solver"]
SolverJob.__init__(self,basename,solver,
arrayJob=True,
template=template,
**kwargs)
[docs] def taskParameters(self,id):
return {}
[docs] def setup(self,parameters):
RunParameterVariation(args=[self.casedir(),
path.join(self.casedir(),self.__variationfile),
"--allow-exec",
"--parameter-file="+path.join(self.casedir(),self.__parameterfile),
"--single-variation=%d" % (self.taskID-1),
"--no-execute-solver",
"--auto-create-database",
"--no-database-write",
"--inplace-execution"])
# Should work with Python3 and Python2