]> Creatis software - clitk.git/commitdiff
Add statistical tool to compute speed up for gate jobs
authortbaudier <thomas.baudier@creatis.insa-lyon.fr>
Wed, 25 Jul 2018 09:13:20 +0000 (11:13 +0200)
committertbaudier <thomas.baudier@creatis.insa-lyon.fr>
Wed, 25 Jul 2018 09:13:20 +0000 (11:13 +0200)
cluster_tools/CMakeLists.txt
cluster_tools/computeEnlapsedTime.py [new file with mode: 0644]
cluster_tools/gate_power_merge.sh

index e208df41b3bb500fd69aeb9bf8adfac3d2dac8c8..13a681792bf52b0d9faf9fa99071f048b313e70b 100644 (file)
@@ -16,6 +16,7 @@ if(CLITK_BUILD_TOOLS)
     gate_run_submit_cluster_nomove.sh
     gate_job_cluster_nomove.job
     check_tmp_cluster.sh
+    computeEnlapsedTime.py
     )
 
   install(FILES ${SCRIPTS} DESTINATION bin PERMISSIONS OWNER_READ OWNER_WRITE OWNER_EXECUTE GROUP_READ GROUP_EXECUTE WORLD_READ WORLD_EXECUTE)
diff --git a/cluster_tools/computeEnlapsedTime.py b/cluster_tools/computeEnlapsedTime.py
new file mode 100644 (file)
index 0000000..93ad1ac
--- /dev/null
@@ -0,0 +1,129 @@
+import sys
+import os
+import datetime
+import numpy
+
+
+def usage():
+    print("Compute the enlapsed time for simulation with submitted, started and ended time")
+    print("in ParentFolderName and its subfolder and starting by FileName.")
+    print("The output is in statJobs.txt.")
+    print("Usage:")
+    print("python computeEnlapseTyme.py ParentFolderName FileName")
+    print("eg: python computeEnlapseTyme.py spect_phs_build gate")
+    return
+
+def is_number(s):
+    try:
+        int(s)
+        return True
+    except ValueError:
+        pass
+
+def createDate(s):
+    #Split the line to find the different element of the date
+    words = s.split(' ')
+    submittedDate = datetime.datetime(year=2000, month=1, day=1, hour=0, minute=0, second=0)
+    for word in words:
+        if is_number(word):
+            #Find the year
+            if int(word) > 2000:
+                submittedDate = submittedDate.replace(year=int(word))
+            #Find the day
+            elif int(word) < 32:
+                submittedDate = submittedDate.replace(day=int(word))
+        #Find the month
+        elif word == "Jan":
+            submittedDate = submittedDate.replace(month=1)
+        elif word == "Feb":
+            submittedDate = submittedDate.replace(month=2)
+        elif word == "Mar":
+            submittedDate = submittedDate.replace(month=3)
+        elif word == "Apr":
+            submittedDate = submittedDate.replace(month=4)
+        elif word == "May":
+            submittedDate = submittedDate.replace(month=5)
+        elif word == "Jun":
+            submittedDate = submittedDate.replace(month=6)
+        elif word == "Jul":
+            submittedDate = submittedDate.replace(month=7)
+        elif word == "Aug":
+            submittedDate = submittedDate.replace(month=8)
+        elif word == "Sep":
+            submittedDate = submittedDate.replace(month=9)
+        elif word == "Oct":
+            submittedDate = submittedDate.replace(month=10)
+        elif word == "Nov":
+            submittedDate = submittedDate.replace(month=11)
+        elif word == "Dec":
+            submittedDate = submittedDate.replace(month=12)
+        #Find the hour:minute:second
+        elif word.count(':') == 2:
+            times = word.split(':')
+            submittedDate = submittedDate.replace(hour=int(times[0]))
+            submittedDate = submittedDate.replace(minute=int(times[1]))
+            submittedDate = submittedDate.replace(second=int(times[2]))
+    return submittedDate
+
+def computeEnlapsedTime():
+    #Get the folder name ad the filename
+    folderName = sys.argv[1]
+    fileName = sys.argv[2]
+
+    #Look for all files starting with the fileName
+    filePaths = []
+    for root, dirs, files in os.walk(folderName):
+        for file in files:
+            if file.startswith(fileName):
+                filePaths.append(os.path.join(root, file))
+
+    #For each file into filePaths, save the submitted, started end ended time
+    submittedTime = []
+    startedTime = []
+    endedTime = []
+    for file in filePaths:
+        for line in open(file):
+            if "Submitted" in line:
+                submittedTime += [createDate(line)]
+            if "Started" in line:
+                startedTime += [createDate(line)]
+            if "Ended" in line:
+                endedTime += [createDate(line)]
+
+    #Computation time
+    computationTime = [a - b for a, b in zip(endedTime, startedTime)]
+    computationTimeSecond = numpy.array([time.total_seconds() for time in computationTime])
+    waitingTime = [a - b for a, b in zip(endedTime, submittedTime)]
+    waitingTimeSecond = numpy.array([time.total_seconds() for time in waitingTime])
+    computePart =computationTimeSecond / waitingTimeSecond * 100.0
+
+    #Write the output
+    with open('statJobs.txt', 'w') as output:
+        output.write(str(len(filePaths)) + " run jobs\n\n")
+        output.write("Starting submission at " + str(min(submittedTime)) + "\n")
+        output.write("Finishing submission at " + str(max(submittedTime)) + "\n\n")
+        output.write("First job started computation at " + str(min(startedTime)) + "\n")
+        output.write("Last job started computation at " + str(max(startedTime)) + "\n\n")
+        output.write("First job ended computation at " + str(min(endedTime)) + "\n")
+        output.write("Last job ended computation at " + str(max(endedTime)) + "\n\n")
+        output.write("Min computation time " + str(min(computationTime)) + "\n")
+        output.write("Max computation time " + str(max(computationTime)) + "\n")
+        output.write("Mean computation time " + str(datetime.timedelta(seconds=numpy.mean(computationTimeSecond))) + "\n")
+        output.write("Std computation time " + str(datetime.timedelta(seconds=numpy.std(computationTimeSecond))) + "\n\n")
+        output.write("Min waiting time " + str(min(waitingTime)) + "\n")
+        output.write("Max waiting time " + str(max(waitingTime)) + "\n")
+        output.write("Mean waiting time " + str(datetime.timedelta(seconds=numpy.mean(waitingTimeSecond))) + "\n")
+        output.write("Std waiting " + str(datetime.timedelta(seconds=numpy.std(waitingTimeSecond))) + "\n\n")
+        output.write("Min efficiency "  + str(min(computePart)) + " %\n")
+        output.write("Max efficiency " + str(max(computePart)) + " %\n")
+        output.write("Mean efficiency " + str(numpy.mean(computePart)) + " %\n")
+        output.write("Std efficiency " + str(numpy.std(computePart)) + " %\n\n")
+        output.write("Speed up " + str(numpy.mean(computationTimeSecond)*len(filePaths)/max(waitingTimeSecond)) + "\n\n")
+
+    return
+
+if __name__ == "__main__":
+    if len(sys.argv) != 3:
+        usage()
+    else:
+        computeEnlapsedTime()
index 767fb2f6d2d70e063d4e0d8b0539f29ff1a66be4..8fa4cfc3b15935562fc7c5b898e22ae020f6755d 100755 (executable)
@@ -595,6 +595,11 @@ do
     merge_dispatcher_uncertainty "${outputfile}" "${force}"
 done
 
+echo "compute job statistics"
+python computeEnlapsedTime.py ${rundir} gate
+mv "statJobs.txt" "${outputdir}/statJobs.txt"
+
+
 if [ -f "${rundir}/params.txt" ]
 then
     echo "copying params file"