Extend runtime plot to use core no. information

a94c8df8 · Klaus Rabbertz · 0cac6847 · a94c8df8
Commit a94c8df8 authored 1 year ago by Klaus Rabbertz
--- a/tools/plotting/fastnnlo_runtime.py
+++ b/tools/plotting/fastnnlo_runtime.py
@@ -120,7 +120,7 @@ def main():
            exit(1)

    # get all the information from logfiles as dict
-    # dict contains: runtime, runtime_unit, channel, events
+    # dict contains: runtimes, runtime_unit, numcores; channel, events
    loginformation = get_loginformation(logfiles)
    runinformation = get_runinformation(runfiles)
    info = {**loginformation, **runinformation}
@@ -175,10 +175,12 @@ def get_loginformation(files):

    # always in hours for simplicity
    runtimes = []
+    numcores = []
    unit = 'hours'

    for file in files:
        runtimes_temp = []
+        numcores_temp = []

        with open(file) as origin:
            for line in origin:
@@ -190,13 +192,20 @@ def get_loginformation(files):
                    seconds = float(line[3])
                    runtimes_temp.append(hours + minutes/60 + seconds/3600)

+                if 'Allocated number of threads' in line:
+                    line = line.split(':')
+                    numcores_temp.append(int(line[1]))
+
        runtimes.append(runtimes_temp[-1])
+        numcores.append(numcores_temp[-1])

    runtimes = np.array(runtimes)
+    numcores = np.array(numcores)

    information = {
-        'runtime': runtimes,
-        'runtime_unit': unit
+        'runtimes': runtimes,
+        'runtime_unit': unit,
+        'numcores': numcores
    }

    return information
@@ -251,8 +260,9 @@ def get_runinformation(files):

 def plot_elapsed_time(infodict, out_path, out_name, formats):

-    times = infodict['runtime']
-    unit  = infodict['runtime_unit']
+    times    = infodict['runtimes']
+    unit     = infodict['runtime_unit']
+    cores    = infodict['numcores']
    channels = infodict['channels']
    events   = infodict['events']

@@ -262,6 +272,8 @@ def plot_elapsed_time(infodict, out_path, out_name, formats):
        print('fastnnlo_runtime: ERROR! Aborted, no channel info found.')
        exit(11)

+    cputimes = np.multiply(times,cores)
+
    bins = np.linspace(min(times), max(times), 100)
    # get relevant values
    mean = np.mean(times)
@@ -271,7 +283,7 @@ def plot_elapsed_time(infodict, out_path, out_name, formats):
    # In future?:
    #    iqd = np.subtract(*np.percentile(times, [75, 25], method='linear'))/2.

-    CPUtime = np.sum(times)
+    CPUtime = np.sum(cputimes)

    # set figure
    fig = plt.figure(figsize=(16, 12))
@@ -280,26 +292,27 @@ def plot_elapsed_time(infodict, out_path, out_name, formats):
    # plot histogram

    if len(unique_channels) > 1 or [unique_channels] == 'ALL':
-        n, batches, _ = ax.hist(times, bins=20, color='deepskyblue', edgecolor='black', label='Total CPU time: {0:0.0f} hours'.format(CPUtime))
+        n, batches, _ = ax.hist(times, bins=20, color='deepskyblue', edgecolor='black', label='Total CPU time: {0:0.0f} h'.format(CPUtime))
        ax.legend(loc='best', fontsize=20)
    else:
        plt.text
        # plot each unique number in different color
        for ev_num in set(events):
-            n, batches, _ = ax.hist(times[events == ev_num], histtype='barstacked', log=True, stacked=True, bins=bins, edgecolor='black', label='# Events: {}'.format(ev_num))
+            ncore = np.mean(cores[events == ev_num])
+            n, batches, _ = ax.hist(times[events == ev_num], histtype='barstacked', log=True, stacked=True, bins=bins, edgecolor='black', label='#Events@Cores: {} @ {}'.format(ev_num,ncore))

        # plot mean and median
-        ax.vlines(mean, 0, max(n), colors='red', linestyles='dashed', label=r'Mean: {0:0.1f}$\pm${1:0.1f}'.format(mean, std))
-        ax.vlines(median, 0, max(n), colors='green', linestyles='dashdot', label=r'Median: {0:0.2f}$\pm${1:0.2f}'.format(median, iqd))
+        ax.vlines(mean, 0, max(n), colors='red', linestyles='dashed', label=r'Mean run time: {0:0.1f}$\pm${1:0.1f} h'.format(mean, std))
+        ax.vlines(median, 0, max(n), colors='green', linestyles='dashdot', label=r'Median run time: {0:0.2f}$\pm${1:0.2f} h'.format(median, iqd))
        ax.ticklabel_format(axis='x', style='plain', useOffset=False)
-        ax.legend(title='Total CPU time: {0:0.0f}hours'.format(CPUtime), loc='best', fontsize=20, title_fontsize=20)
+        ax.legend(title='Total CPU time: {0:0.0f} h'.format(CPUtime), loc='best', fontsize=20, title_fontsize=20)

    # finish and save figure
    chnlabel = channels[0]
    if out_name:
        chnlabel = out_name
-    ax.set_title('Elapsed time of ' + chnlabel + ' production', fontsize=20)
-    ax.set_xlabel('CPU time [' + unit + ']', horizontalalignment='right', x=1.0, verticalalignment='top', y=1.0, fontsize=20)
+    ax.set_title('Run times of ' + chnlabel + ' production', fontsize=20)
+    ax.set_xlabel('Job run time [h]', horizontalalignment='right', x=1.0, verticalalignment='top', y=1.0, fontsize=20)
    ax.set_ylabel('# jobs', horizontalalignment='right', x=1.0, verticalalignment='top', y=1.0, fontsize=20, labelpad=20)
    ax.set_yscale('log')
    ax.tick_params(axis='both', which='major', labelsize=20)
@@ -323,36 +336,44 @@ def plot_events_per_hour(infodict, out_path, out_name, formats):
    # get input
    channels = infodict['channels']
    events   = infodict['events']
-    times    = infodict['runtime']
+    cores    = infodict['numcores']
+    times    = infodict['runtimes']
    unit     = infodict['runtime_unit']

    # prepare input
+    cputimes = np.multiply(times,cores)
    unique_channels = set(channels)
    if len(unique_channels) == 0:
        print('fastnnlo_runtime: ERROR! Aborted, no channel info found.')
        exit(11)
    eph = []
+    ncr = []
+    #    for i, time in enumerate(cputimes): # This would be per hour & core!
    for i, time in enumerate(times):
        eph.append(float(events[i])/time)
+        ncr.append(float(cores[i]))
    ephchn = []
+    ncrchn = []
    for i, val in enumerate(eph):
        for j, chn in enumerate(_channels):
            if channels[i] == chn:
                ephchn.append([val, j])
+                ncrchn.append([ncr[i], j])
    ephchn = np.array(ephchn)
+    ncrchn = np.array(ncrchn)
    masks = []
    for i, chn in enumerate(_channels):
        masks.append(ephchn[:,1] == i)

    # get relevant values
-    mean    = np.mean(eph)
-    std     = np.std(eph)
-    median  = np.median(eph)
-    iqd     = np.subtract(*np.percentile(eph, [75, 25], interpolation='linear'))/2.
-    ephmin  = np.min(eph)
-    ephmax  = np.max(eph)
-    logbins = np.geomspace(ephmin, ephmax, 100)
-    CPUtime = np.sum(times)
+    mean     = np.mean(eph)
+    std      = np.std(eph)
+    median   = np.median(eph)
+    iqd      = np.subtract(*np.percentile(eph, [75, 25], interpolation='linear'))/2.
+    ephmin   = np.min(eph)
+    ephmax   = np.max(eph)
+    logbins  = np.geomspace(ephmin, ephmax, 100)
+    CPUtime  = np.sum(times)

    # create figure
    fig = plt.figure(figsize=(16, 12))
@@ -360,11 +381,12 @@ def plot_events_per_hour(infodict, out_path, out_name, formats):

    # plot (multistack-)histogram
    evrs = []
+    ncrs = []
    for chn in _channels:
        if chn in unique_channels:
            evrs.append(ephchn[masks[_channel_number[chn]]][:,0])
+            ncrs.append(ncrchn[masks[_channel_number[chn]]][:,0])
    if len(unique_channels) == 1:
-
        # plot each unique number in different color
        for ev_num in set(events):
            n, batches, _ = ax.hist(evrs[0][events == ev_num], histtype='barstacked', log=True, stacked=True, bins=50, edgecolor='black', label='# Events: {}'.format(ev_num))
@@ -376,9 +398,9 @@ def plot_events_per_hour(infodict, out_path, out_name, formats):
    else:
        chnlab = []
        chncol = []
-        for chn in _channels:
+        for i, chn in enumerate(_channels):
            if chn in unique_channels:
-                chnlab.append(chn)
+                chnlab.append(chn+' @ '+str(np.mean(ncrs[i]))+' core(s)')
                chncol.append(_channel_colors[_channel_number[chn]])
        n, batches, _ = ax.hist(evrs, histtype='barstacked', log=True, stacked=True, bins=logbins, edgecolor='black', color=chncol, label=chnlab)
        ax.set_xlim(0.9*ephmin, 1.1*ephmax)