diff --git a/tools/plotting/fastnnlo_runtime.py b/tools/plotting/fastnnlo_runtime.py index af85cc43fc69c4aff7c74de11a6a0819a12d8d79..17a803d6c947ac00bff0b768377af0ca3caecc56 100755 --- a/tools/plotting/fastnnlo_runtime.py +++ b/tools/plotting/fastnnlo_runtime.py @@ -4,6 +4,8 @@ import glob import argparse import glob +import os +import re import sys import matplotlib as mpl import matplotlib.gridspec as gridspec @@ -51,6 +53,14 @@ import matplotlib.pyplot as plt # numpy import numpy as np + +# Redefine ScalarFormatter +class ScalarFormatterForceFormat(ScalarFormatter): + # Override function that finds format to use. + def _set_format(self, vmin, vmax): + self.format = "%1.2f" # Give format here + + # Action class to allow comma-separated (or empty) list in options class SplitArgs(argparse.Action): def __call__(self, parser, namespace, values, option_string=None): @@ -62,6 +72,9 @@ class SplitArgs(argparse.Action): # Some global definitions _debug = False _formats = {'eps': 0, 'pdf': 1, 'png': 2, 'svg': 3} +_channels = ['LO', 'R', 'V', 'RRa', 'RRb', 'RV', 'VV', 'ALL'] +_channel_number = {'LO': 0, 'R': 1, 'V': 2, 'RRa': 3, 'RRb': 4, 'RV': 5, 'VV': 6} +_channel_colors = ['tab:green', 'tab:cyan', 'tab:blue', 'tab:red', 'tab:orange', 'tab:pink', 'tab:purple'] ##################################################################################### @@ -77,7 +90,16 @@ def main(): args = arguments() # extract correct paths for input and outputfiles - logfiles = get_files(args['logfiles']) + files = get_files(args['logfiles']) + logfiles = [] + runfiles = [] + for file in files: + runfile = re.sub('.log$', '.run', file) + if not os.path.isfile(runfile): + print('[fastnnlo_runtime]: WARNING! No matching runcard found for log file {}, skipped!') + else: + logfiles.append(file) + runfiles.append(runfile) outputpath = args['output'] print('[fastnnlo_runtime]: Output path argument is: {}'.format(outputpath)) outputname = args['filename'] @@ -100,15 +122,17 @@ def main(): # get all the information from logfiles as dict # dict contains: runtime, runtime_unit, channel, events loginformation = get_loginformation(logfiles) + runinformation = get_runinformation(runfiles) + info = {**loginformation, **runinformation} # plot all the information if args['CPUtime']: - plot_elapsed_time(loginformation, outputpath, outputname, formats) + plot_elapsed_time(info, outputpath, outputname, formats) if args['Events']: - plot_events_per_hour(loginformation, outputpath, outputname, formats) + plot_events_per_hour(info, outputpath, outputname, formats) if not args['CPUtime'] and not args['Events']: - plot_elapsed_time(loginformation, outputpath, outputname, formats) - plot_events_per_hour(loginformation, outputpath, outputname, formats) + plot_elapsed_time(info, outputpath, outputname, formats) + plot_events_per_hour(info, outputpath, outputname, formats) exit(0) @@ -143,17 +167,16 @@ def get_files(files): print('fastnnlo_runtime: ERROR! Aborted, only one log file found: {}'.format(files[0])) exit(3) + # sort unsorted glob list + files.sort() return files def get_loginformation(files): - run_time = [] - number_events = [] - channel = None + runtimes = [] for file in files: - event = False - run_time_temp = [] + runtimes_temp = [] with open(file) as origin: for line in origin: @@ -165,138 +188,182 @@ def get_loginformation(files): seconds = float(line[3]) if hours != 0.: - run_time_temp.append(hours + minutes/60 + seconds/360) + runtimes_temp.append(hours + minutes/60 + seconds/360) unit = 'hours' else: - run_time_temp.append(minutes + seconds/60) + runtimes_temp.append(minutes + seconds/60) unit = 'minutes' - # extract channel name - if 'Tablename' in line and not channel: - line = line.split() - tablename = line[2].split('.') - channel = tablename[0] + '.' + tablename[1] - # extract total events - if 'ncalltot=' in line and not event: - line = line.split(',') - number_events.append(float(line[4][10:])) - event = True + runtimes.append(runtimes_temp[-1]) + + runtimes = np.array(runtimes) - run_time.append(run_time_temp[-1]) + information = { + 'runtime': runtimes, + 'runtime_unit': unit + } + return information + +def get_runinformation(files): + + nevents = [] + channels = [] + + for file in files: + with open(file) as origin: + for line in origin: + # extract total events + if 'Number of events' in line: + line = line.split() + nevents.append(line[0]) + if 'Job name id' in line: + line = line.split() + parts = line[0].split('-') + channels.append(parts[0]) - run_time = np.array(run_time) - number_events = np.array(number_events) + nevents = np.array(nevents) + channels = np.array(channels) information = { - 'runtime': run_time, - 'runtime_unit': unit, - 'channel': channel, - 'events': number_events + 'events': nevents, + 'channels': channels } return information -def plot_elapsed_time(informationdict, out_path, out_name, formats): +def plot_elapsed_time(infodict, out_path, out_name, formats): - time = informationdict['runtime'] - unit = informationdict['runtime_unit'] - channel = informationdict['channel'] - basename = 'runtime' + times = infodict['runtime'] + unit = infodict['runtime_unit'] + channels = infodict['channels'] # get relevant values - mean = np.mean(time) - std = np.std(time) - median = np.median(time) - iqd = np.subtract(*np.percentile(time, [75, 25], interpolation='linear'))/2. + mean = np.mean(times) + std = np.std(times) + median = np.median(times) + iqd = np.subtract(*np.percentile(times, [75, 25], interpolation='linear'))/2. - CPUtime = np.sum(time) / (1 if unit == 'hours' else 60) + CPUtime = np.sum(times) / (1 if unit == 'hours' else 60) # set figure fig = plt.figure(figsize=(16, 12)) ax = fig.gca() # plot histogram - n, batches, _ = ax.hist(time, bins=20, color='deepskyblue', edgecolor='black', label='Total CPU time: {0:0.0f} hours'.format(CPUtime)) + n, batches, _ = ax.hist(times, bins=20, color='deepskyblue', edgecolor='black', label='Total CPU time: {0:0.0f} hours'.format(CPUtime)) # plot mean and median ax.vlines(mean, 0, max(n), colors='red', linestyles='dashed', label=r'Mean: {0:0.1f}$\pm${2:0.1f} {1}'.format(mean, unit, std)) - ax.vlines(median, 0, max(n), colors='green', linestyles='dashed', label=r'Median: {0:0.1f}$\pm${2:0.1f} {1}'.format(median, unit, iqd)) + ax.vlines(median, 0, max(n), colors='green', linestyles='dashdot', label=r'Median: {0:0.1f}$\pm${2:0.1f} {1}'.format(median, unit, iqd)) # finish and save figure - chnlabel = channel + chnlabel = channels[0] if out_name: chnlabel = out_name ax.set_title('Elapsed time of ' + chnlabel + ' production', fontsize=20) ax.set_xlabel('CPU time [' + unit + ']', horizontalalignment='right', x=1.0, verticalalignment='top', y=1.0, fontsize=20) - ax.set_ylabel('frequency', horizontalalignment='right', x=1.0, verticalalignment='top', y=1.0, fontsize=20) + ax.set_ylabel('# jobs', horizontalalignment='right', x=1.0, verticalalignment='top', y=1.0, fontsize=20, labelpad=20) ax.set_yscale('log') ax.tick_params(axis='both', which='major', labelsize=20) + ax.ticklabel_format(axis='x', style='plain', useOffset=False) ax.legend(loc='best', fontsize=20) ax.grid() ax.set_axisbelow(True) # set saving location + basename = 'runtime' for fmt in formats: filename = out_path + ('' if out_path[-1] == '/' else '/') if out_name: filename += out_name + '.' + basename + '.' + fmt else: - filename += channel + '.' + basename + '.' + fmt + filename += channels[0] + '.' + basename + '.' + fmt print('[fastnnlo_runtime]: Saving runtime plot {}'.format(filename)) fig.savefig(filename) -def plot_events_per_hour(informationdict, out_path, out_name, formats): - - time = informationdict['runtime'] - unit = informationdict['runtime_unit'] - channel = informationdict['channel'] - events = informationdict['events'] - basename = 'evtrate' - - if unit == 'hours': - eph = events/time - else: - eph = events/(time/60) +def plot_events_per_hour(infodict, out_path, out_name, formats): + + # get input + channels = infodict['channels'] + events = infodict['events'] + times = infodict['runtime'] + unit = infodict['runtime_unit'] + + # prepare input + unique_channels = set(channels) + if len(unique_channels) == 0: + print('fastnnlo_runtime: ERROR! Aborted, no channel info found.') + exit(11) + eph = [] + for i, time in enumerate(times): + if unit == 'hours': + eph.append(float(events[i])/time) + else: + eph.append(float(events[i])/(time/60)) + ephchn = [] + for i, val in enumerate(eph): + for j, chn in enumerate(_channels): + if channels[i] == chn: + ephchn.append([val, j]) + ephchn = np.array(ephchn) + masks = [] + for i, chn in enumerate(_channels): + masks.append(ephchn[:,1] == i) # get relevant values - mean = np.mean(eph) - std = np.std(eph) - median = np.median(eph) - iqd = np.subtract(*np.percentile(eph, [75, 25], interpolation='linear'))/2. - - CPUtime = np.sum(time) / (1 if unit == 'hours' else 60) - - # set figure + mean = np.mean(eph) + std = np.std(eph) + median = np.median(eph) + iqd = np.subtract(*np.percentile(eph, [75, 25], interpolation='linear'))/2. + ephmin = np.min(eph) + ephmax = np.max(eph) + logbins = np.geomspace(ephmin, ephmax, 100) + CPUtime = np.sum(times) / (1 if unit == 'hours' else 60) + + # create figure fig = plt.figure(figsize=(16, 12)) ax = fig.gca() - # plot histogram - n, batches, _ = ax.hist(eph, bins=20, color='deepskyblue', edgecolor='black', label='Total CPU time: {0:0.0f} hours'.format(CPUtime)) - - # plot mean and median - ax.vlines(mean, 0, max(n), colors='red', linestyles='dashed', label=r'Mean: {0:0.1e}$\pm${1:0.1e} events/hour'.format(mean, std)) - ax.vlines(median, 0, max(n), colors='green', linestyles='dashed', label=r'Median: {0:0.2e}$\pm${1:0.2e} events/hour'.format(median, iqd)) + # plot (multistack-)histogram + evrs = [] + lastch = 'LO' + for chn in _channels: + if chn in unique_channels: + evrs.append(ephchn[masks[_channel_number[chn]]][:,0]) + lastch = chn + if len(unique_channels) == 1: + n, batches, _ = ax.hist(evrs, log=True, bins=20, edgecolor='black', color=_channel_colors[_channel_number[lastch]], label='Total CPU time: {0:0.0f} hours'.format(CPUtime)) + # plot mean and median + ax.vlines(mean, 0, max(n), colors='red', linestyles='dashed', label=r'Mean: {0:0.1e}$\pm${1:0.1e} events/hour'.format(mean, std)) + ax.vlines(median, 0, max(n), colors='green', linestyles='dashdot', label=r'Median: {0:0.2e}$\pm${1:0.2e} events/hour'.format(median, iqd)) + ax.ticklabel_format(axis='x', style='plain', useOffset=False) + else: + n, batches, _ = ax.hist(evrs, histtype='barstacked', log=True, stacked=True, bins=logbins, edgecolor='black', color=_channel_colors, label=_channels) + ax.set_xlim(0.9*ephmin, 1.1*ephmax) + ax.set_xscale('log') # finish and save figure - ax.set_title('Event rate of ' + channel + ' production', fontsize=20) + chnlabel = channels[0] + if out_name: + chnlabel = out_name + ax.set_title('Event rate of ' + chnlabel + ' production', fontsize=20) ax.set_xlabel('event rate [1/hour]', horizontalalignment='right', x=1.0, verticalalignment='top', y=1.0, fontsize=20) - ax.set_ylabel('frequency', horizontalalignment='right', x=1.0, verticalalignment='top', y=1.0, fontsize=20) - ax.set_yscale('log') + ax.set_ylabel('# jobs', horizontalalignment='right', x=1.0, verticalalignment='top', y=1.0, fontsize=20, labelpad=20) ax.tick_params(axis='both', which='major', labelsize=20) - ax.legend(loc='best', fontsize=20) ax.grid() ax.set_axisbelow(True) # set saving location + basename = 'evtrate' for fmt in formats: filename = out_path + ('' if out_path[-1] == '/' else '/') if out_name: filename += out_name + '.' + basename + '.' + fmt else: - filename += channel + '.' + basename + '.' + fmt + filename += channels[0] + '.' + basename + '.' + fmt print('[fastnnlo_runtime]: Saving event rate plot {}'.format(filename)) fig.savefig(filename)