Skip to content
Snippets Groups Projects
Commit 80e8ee4f authored by Klaus Rabbertz's avatar Klaus Rabbertz
Browse files

Work on event rate summary plot

parent a026f022
Branches
Tags
No related merge requests found
......@@ -4,6 +4,8 @@
import glob
import argparse
import glob
import os
import re
import sys
import matplotlib as mpl
import matplotlib.gridspec as gridspec
......@@ -51,6 +53,14 @@ import matplotlib.pyplot as plt
# numpy
import numpy as np
# Redefine ScalarFormatter
class ScalarFormatterForceFormat(ScalarFormatter):
# Override function that finds format to use.
def _set_format(self, vmin, vmax):
self.format = "%1.2f" # Give format here
# Action class to allow comma-separated (or empty) list in options
class SplitArgs(argparse.Action):
def __call__(self, parser, namespace, values, option_string=None):
......@@ -62,6 +72,9 @@ class SplitArgs(argparse.Action):
# Some global definitions
_debug = False
_formats = {'eps': 0, 'pdf': 1, 'png': 2, 'svg': 3}
_channels = ['LO', 'R', 'V', 'RRa', 'RRb', 'RV', 'VV', 'ALL']
_channel_number = {'LO': 0, 'R': 1, 'V': 2, 'RRa': 3, 'RRb': 4, 'RV': 5, 'VV': 6}
_channel_colors = ['tab:green', 'tab:cyan', 'tab:blue', 'tab:red', 'tab:orange', 'tab:pink', 'tab:purple']
#####################################################################################
......@@ -77,7 +90,16 @@ def main():
args = arguments()
# extract correct paths for input and outputfiles
logfiles = get_files(args['logfiles'])
files = get_files(args['logfiles'])
logfiles = []
runfiles = []
for file in files:
runfile = re.sub('.log$', '.run', file)
if not os.path.isfile(runfile):
print('[fastnnlo_runtime]: WARNING! No matching runcard found for log file {}, skipped!')
else:
logfiles.append(file)
runfiles.append(runfile)
outputpath = args['output']
print('[fastnnlo_runtime]: Output path argument is: {}'.format(outputpath))
outputname = args['filename']
......@@ -100,15 +122,17 @@ def main():
# get all the information from logfiles as dict
# dict contains: runtime, runtime_unit, channel, events
loginformation = get_loginformation(logfiles)
runinformation = get_runinformation(runfiles)
info = {**loginformation, **runinformation}
# plot all the information
if args['CPUtime']:
plot_elapsed_time(loginformation, outputpath, outputname, formats)
plot_elapsed_time(info, outputpath, outputname, formats)
if args['Events']:
plot_events_per_hour(loginformation, outputpath, outputname, formats)
plot_events_per_hour(info, outputpath, outputname, formats)
if not args['CPUtime'] and not args['Events']:
plot_elapsed_time(loginformation, outputpath, outputname, formats)
plot_events_per_hour(loginformation, outputpath, outputname, formats)
plot_elapsed_time(info, outputpath, outputname, formats)
plot_events_per_hour(info, outputpath, outputname, formats)
exit(0)
......@@ -143,17 +167,16 @@ def get_files(files):
print('fastnnlo_runtime: ERROR! Aborted, only one log file found: {}'.format(files[0]))
exit(3)
# sort unsorted glob list
files.sort()
return files
def get_loginformation(files):
run_time = []
number_events = []
channel = None
runtimes = []
for file in files:
event = False
run_time_temp = []
runtimes_temp = []
with open(file) as origin:
for line in origin:
......@@ -165,138 +188,182 @@ def get_loginformation(files):
seconds = float(line[3])
if hours != 0.:
run_time_temp.append(hours + minutes/60 + seconds/360)
runtimes_temp.append(hours + minutes/60 + seconds/360)
unit = 'hours'
else:
run_time_temp.append(minutes + seconds/60)
runtimes_temp.append(minutes + seconds/60)
unit = 'minutes'
# extract channel name
if 'Tablename' in line and not channel:
line = line.split()
tablename = line[2].split('.')
channel = tablename[0] + '.' + tablename[1]
# extract total events
if 'ncalltot=' in line and not event:
line = line.split(',')
number_events.append(float(line[4][10:]))
event = True
runtimes.append(runtimes_temp[-1])
runtimes = np.array(runtimes)
run_time.append(run_time_temp[-1])
information = {
'runtime': runtimes,
'runtime_unit': unit
}
return information
def get_runinformation(files):
nevents = []
channels = []
for file in files:
with open(file) as origin:
for line in origin:
# extract total events
if 'Number of events' in line:
line = line.split()
nevents.append(line[0])
if 'Job name id' in line:
line = line.split()
parts = line[0].split('-')
channels.append(parts[0])
run_time = np.array(run_time)
number_events = np.array(number_events)
nevents = np.array(nevents)
channels = np.array(channels)
information = {
'runtime': run_time,
'runtime_unit': unit,
'channel': channel,
'events': number_events
'events': nevents,
'channels': channels
}
return information
def plot_elapsed_time(informationdict, out_path, out_name, formats):
def plot_elapsed_time(infodict, out_path, out_name, formats):
time = informationdict['runtime']
unit = informationdict['runtime_unit']
channel = informationdict['channel']
basename = 'runtime'
times = infodict['runtime']
unit = infodict['runtime_unit']
channels = infodict['channels']
# get relevant values
mean = np.mean(time)
std = np.std(time)
median = np.median(time)
iqd = np.subtract(*np.percentile(time, [75, 25], interpolation='linear'))/2.
mean = np.mean(times)
std = np.std(times)
median = np.median(times)
iqd = np.subtract(*np.percentile(times, [75, 25], interpolation='linear'))/2.
CPUtime = np.sum(time) / (1 if unit == 'hours' else 60)
CPUtime = np.sum(times) / (1 if unit == 'hours' else 60)
# set figure
fig = plt.figure(figsize=(16, 12))
ax = fig.gca()
# plot histogram
n, batches, _ = ax.hist(time, bins=20, color='deepskyblue', edgecolor='black', label='Total CPU time: {0:0.0f} hours'.format(CPUtime))
n, batches, _ = ax.hist(times, bins=20, color='deepskyblue', edgecolor='black', label='Total CPU time: {0:0.0f} hours'.format(CPUtime))
# plot mean and median
ax.vlines(mean, 0, max(n), colors='red', linestyles='dashed', label=r'Mean: {0:0.1f}$\pm${2:0.1f} {1}'.format(mean, unit, std))
ax.vlines(median, 0, max(n), colors='green', linestyles='dashed', label=r'Median: {0:0.1f}$\pm${2:0.1f} {1}'.format(median, unit, iqd))
ax.vlines(median, 0, max(n), colors='green', linestyles='dashdot', label=r'Median: {0:0.1f}$\pm${2:0.1f} {1}'.format(median, unit, iqd))
# finish and save figure
chnlabel = channel
chnlabel = channels[0]
if out_name:
chnlabel = out_name
ax.set_title('Elapsed time of ' + chnlabel + ' production', fontsize=20)
ax.set_xlabel('CPU time [' + unit + ']', horizontalalignment='right', x=1.0, verticalalignment='top', y=1.0, fontsize=20)
ax.set_ylabel('frequency', horizontalalignment='right', x=1.0, verticalalignment='top', y=1.0, fontsize=20)
ax.set_ylabel('# jobs', horizontalalignment='right', x=1.0, verticalalignment='top', y=1.0, fontsize=20, labelpad=20)
ax.set_yscale('log')
ax.tick_params(axis='both', which='major', labelsize=20)
ax.ticklabel_format(axis='x', style='plain', useOffset=False)
ax.legend(loc='best', fontsize=20)
ax.grid()
ax.set_axisbelow(True)
# set saving location
basename = 'runtime'
for fmt in formats:
filename = out_path + ('' if out_path[-1] == '/' else '/')
if out_name:
filename += out_name + '.' + basename + '.' + fmt
else:
filename += channel + '.' + basename + '.' + fmt
filename += channels[0] + '.' + basename + '.' + fmt
print('[fastnnlo_runtime]: Saving runtime plot {}'.format(filename))
fig.savefig(filename)
def plot_events_per_hour(informationdict, out_path, out_name, formats):
time = informationdict['runtime']
unit = informationdict['runtime_unit']
channel = informationdict['channel']
events = informationdict['events']
basename = 'evtrate'
if unit == 'hours':
eph = events/time
else:
eph = events/(time/60)
def plot_events_per_hour(infodict, out_path, out_name, formats):
# get input
channels = infodict['channels']
events = infodict['events']
times = infodict['runtime']
unit = infodict['runtime_unit']
# prepare input
unique_channels = set(channels)
if len(unique_channels) == 0:
print('fastnnlo_runtime: ERROR! Aborted, no channel info found.')
exit(11)
eph = []
for i, time in enumerate(times):
if unit == 'hours':
eph.append(float(events[i])/time)
else:
eph.append(float(events[i])/(time/60))
ephchn = []
for i, val in enumerate(eph):
for j, chn in enumerate(_channels):
if channels[i] == chn:
ephchn.append([val, j])
ephchn = np.array(ephchn)
masks = []
for i, chn in enumerate(_channels):
masks.append(ephchn[:,1] == i)
# get relevant values
mean = np.mean(eph)
std = np.std(eph)
median = np.median(eph)
iqd = np.subtract(*np.percentile(eph, [75, 25], interpolation='linear'))/2.
CPUtime = np.sum(time) / (1 if unit == 'hours' else 60)
# set figure
mean = np.mean(eph)
std = np.std(eph)
median = np.median(eph)
iqd = np.subtract(*np.percentile(eph, [75, 25], interpolation='linear'))/2.
ephmin = np.min(eph)
ephmax = np.max(eph)
logbins = np.geomspace(ephmin, ephmax, 100)
CPUtime = np.sum(times) / (1 if unit == 'hours' else 60)
# create figure
fig = plt.figure(figsize=(16, 12))
ax = fig.gca()
# plot histogram
n, batches, _ = ax.hist(eph, bins=20, color='deepskyblue', edgecolor='black', label='Total CPU time: {0:0.0f} hours'.format(CPUtime))
# plot mean and median
ax.vlines(mean, 0, max(n), colors='red', linestyles='dashed', label=r'Mean: {0:0.1e}$\pm${1:0.1e} events/hour'.format(mean, std))
ax.vlines(median, 0, max(n), colors='green', linestyles='dashed', label=r'Median: {0:0.2e}$\pm${1:0.2e} events/hour'.format(median, iqd))
# plot (multistack-)histogram
evrs = []
lastch = 'LO'
for chn in _channels:
if chn in unique_channels:
evrs.append(ephchn[masks[_channel_number[chn]]][:,0])
lastch = chn
if len(unique_channels) == 1:
n, batches, _ = ax.hist(evrs, log=True, bins=20, edgecolor='black', color=_channel_colors[_channel_number[lastch]], label='Total CPU time: {0:0.0f} hours'.format(CPUtime))
# plot mean and median
ax.vlines(mean, 0, max(n), colors='red', linestyles='dashed', label=r'Mean: {0:0.1e}$\pm${1:0.1e} events/hour'.format(mean, std))
ax.vlines(median, 0, max(n), colors='green', linestyles='dashdot', label=r'Median: {0:0.2e}$\pm${1:0.2e} events/hour'.format(median, iqd))
ax.ticklabel_format(axis='x', style='plain', useOffset=False)
else:
n, batches, _ = ax.hist(evrs, histtype='barstacked', log=True, stacked=True, bins=logbins, edgecolor='black', color=_channel_colors, label=_channels)
ax.set_xlim(0.9*ephmin, 1.1*ephmax)
ax.set_xscale('log')
# finish and save figure
ax.set_title('Event rate of ' + channel + ' production', fontsize=20)
chnlabel = channels[0]
if out_name:
chnlabel = out_name
ax.set_title('Event rate of ' + chnlabel + ' production', fontsize=20)
ax.set_xlabel('event rate [1/hour]', horizontalalignment='right', x=1.0, verticalalignment='top', y=1.0, fontsize=20)
ax.set_ylabel('frequency', horizontalalignment='right', x=1.0, verticalalignment='top', y=1.0, fontsize=20)
ax.set_yscale('log')
ax.set_ylabel('# jobs', horizontalalignment='right', x=1.0, verticalalignment='top', y=1.0, fontsize=20, labelpad=20)
ax.tick_params(axis='both', which='major', labelsize=20)
ax.legend(loc='best', fontsize=20)
ax.grid()
ax.set_axisbelow(True)
# set saving location
basename = 'evtrate'
for fmt in formats:
filename = out_path + ('' if out_path[-1] == '/' else '/')
if out_name:
filename += out_name + '.' + basename + '.' + fmt
else:
filename += channel + '.' + basename + '.' + fmt
filename += channels[0] + '.' + basename + '.' + fmt
print('[fastnnlo_runtime]: Saving event rate plot {}'.format(filename))
fig.savefig(filename)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment