From 471aabba05bfca8587cb2b4d0ab21a1b896d145d Mon Sep 17 00:00:00 2001 From: tbaudier Date: Wed, 25 Jul 2018 11:13:20 +0200 Subject: [PATCH] Add statistical tool to compute speed up for gate jobs --- cluster_tools/CMakeLists.txt | 1 + cluster_tools/computeEnlapsedTime.py | 129 +++++++++++++++++++++++++++ cluster_tools/gate_power_merge.sh | 5 ++ 3 files changed, 135 insertions(+) create mode 100644 cluster_tools/computeEnlapsedTime.py diff --git a/cluster_tools/CMakeLists.txt b/cluster_tools/CMakeLists.txt index e208df4..13a6817 100644 --- a/cluster_tools/CMakeLists.txt +++ b/cluster_tools/CMakeLists.txt @@ -16,6 +16,7 @@ if(CLITK_BUILD_TOOLS) gate_run_submit_cluster_nomove.sh gate_job_cluster_nomove.job check_tmp_cluster.sh + computeEnlapsedTime.py ) install(FILES ${SCRIPTS} DESTINATION bin PERMISSIONS OWNER_READ OWNER_WRITE OWNER_EXECUTE GROUP_READ GROUP_EXECUTE WORLD_READ WORLD_EXECUTE) diff --git a/cluster_tools/computeEnlapsedTime.py b/cluster_tools/computeEnlapsedTime.py new file mode 100644 index 0000000..93ad1ac --- /dev/null +++ b/cluster_tools/computeEnlapsedTime.py @@ -0,0 +1,129 @@ +import sys +import os +import datetime +import numpy + + +def usage(): + print("Compute the enlapsed time for simulation with submitted, started and ended time") + print("in ParentFolderName and its subfolder and starting by FileName.") + print("The output is in statJobs.txt.") + print("Usage:") + print("python computeEnlapseTyme.py ParentFolderName FileName") + print("eg: python computeEnlapseTyme.py spect_phs_build gate") + return + +def is_number(s): + try: + int(s) + return True + except ValueError: + pass + +def createDate(s): + #Split the line to find the different element of the date + words = s.split(' ') + submittedDate = datetime.datetime(year=2000, month=1, day=1, hour=0, minute=0, second=0) + for word in words: + if is_number(word): + #Find the year + if int(word) > 2000: + submittedDate = submittedDate.replace(year=int(word)) + #Find the day + elif int(word) < 32: + submittedDate = submittedDate.replace(day=int(word)) + #Find the month + elif word == "Jan": + submittedDate = submittedDate.replace(month=1) + elif word == "Feb": + submittedDate = submittedDate.replace(month=2) + elif word == "Mar": + submittedDate = submittedDate.replace(month=3) + elif word == "Apr": + submittedDate = submittedDate.replace(month=4) + elif word == "May": + submittedDate = submittedDate.replace(month=5) + elif word == "Jun": + submittedDate = submittedDate.replace(month=6) + elif word == "Jul": + submittedDate = submittedDate.replace(month=7) + elif word == "Aug": + submittedDate = submittedDate.replace(month=8) + elif word == "Sep": + submittedDate = submittedDate.replace(month=9) + elif word == "Oct": + submittedDate = submittedDate.replace(month=10) + elif word == "Nov": + submittedDate = submittedDate.replace(month=11) + elif word == "Dec": + submittedDate = submittedDate.replace(month=12) + #Find the hour:minute:second + elif word.count(':') == 2: + times = word.split(':') + submittedDate = submittedDate.replace(hour=int(times[0])) + submittedDate = submittedDate.replace(minute=int(times[1])) + submittedDate = submittedDate.replace(second=int(times[2])) + return submittedDate + +def computeEnlapsedTime(): + #Get the folder name ad the filename + folderName = sys.argv[1] + fileName = sys.argv[2] + + #Look for all files starting with the fileName + filePaths = [] + for root, dirs, files in os.walk(folderName): + for file in files: + if file.startswith(fileName): + filePaths.append(os.path.join(root, file)) + + #For each file into filePaths, save the submitted, started end ended time + submittedTime = [] + startedTime = [] + endedTime = [] + for file in filePaths: + for line in open(file): + if "Submitted" in line: + submittedTime += [createDate(line)] + if "Started" in line: + startedTime += [createDate(line)] + if "Ended" in line: + endedTime += [createDate(line)] + + #Computation time + computationTime = [a - b for a, b in zip(endedTime, startedTime)] + computationTimeSecond = numpy.array([time.total_seconds() for time in computationTime]) + waitingTime = [a - b for a, b in zip(endedTime, submittedTime)] + waitingTimeSecond = numpy.array([time.total_seconds() for time in waitingTime]) + computePart =computationTimeSecond / waitingTimeSecond * 100.0 + + #Write the output + with open('statJobs.txt', 'w') as output: + output.write(str(len(filePaths)) + " run jobs\n\n") + output.write("Starting submission at " + str(min(submittedTime)) + "\n") + output.write("Finishing submission at " + str(max(submittedTime)) + "\n\n") + output.write("First job started computation at " + str(min(startedTime)) + "\n") + output.write("Last job started computation at " + str(max(startedTime)) + "\n\n") + output.write("First job ended computation at " + str(min(endedTime)) + "\n") + output.write("Last job ended computation at " + str(max(endedTime)) + "\n\n") + output.write("Min computation time " + str(min(computationTime)) + "\n") + output.write("Max computation time " + str(max(computationTime)) + "\n") + output.write("Mean computation time " + str(datetime.timedelta(seconds=numpy.mean(computationTimeSecond))) + "\n") + output.write("Std computation time " + str(datetime.timedelta(seconds=numpy.std(computationTimeSecond))) + "\n\n") + output.write("Min waiting time " + str(min(waitingTime)) + "\n") + output.write("Max waiting time " + str(max(waitingTime)) + "\n") + output.write("Mean waiting time " + str(datetime.timedelta(seconds=numpy.mean(waitingTimeSecond))) + "\n") + output.write("Std waiting " + str(datetime.timedelta(seconds=numpy.std(waitingTimeSecond))) + "\n\n") + output.write("Min efficiency " + str(min(computePart)) + " %\n") + output.write("Max efficiency " + str(max(computePart)) + " %\n") + output.write("Mean efficiency " + str(numpy.mean(computePart)) + " %\n") + output.write("Std efficiency " + str(numpy.std(computePart)) + " %\n\n") + output.write("Speed up " + str(numpy.mean(computationTimeSecond)*len(filePaths)/max(waitingTimeSecond)) + "\n\n") + + return + +if __name__ == "__main__": + if len(sys.argv) != 3: + usage() + else: + computeEnlapsedTime() diff --git a/cluster_tools/gate_power_merge.sh b/cluster_tools/gate_power_merge.sh index 767fb2f..8fa4cfc 100755 --- a/cluster_tools/gate_power_merge.sh +++ b/cluster_tools/gate_power_merge.sh @@ -595,6 +595,11 @@ do merge_dispatcher_uncertainty "${outputfile}" "${force}" done +echo "compute job statistics" +python computeEnlapsedTime.py ${rundir} gate +mv "statJobs.txt" "${outputdir}/statJobs.txt" + + if [ -f "${rundir}/params.txt" ] then echo "copying params file" -- 2.47.1