From da2fe86d50849181dfa5b127d2433a1356fa7201 Mon Sep 17 00:00:00 2001 From: Klaus Rabbertz <klaus.rabbertz@cern.ch> Date: Tue, 30 Jan 2024 10:01:06 +0100 Subject: [PATCH] Update nnlojet-combine from version 29.05.2020 to 22.12.2023 --- tools/nnlojet/nnlojet-combine.py | 152 ++++++++++-------- tools/nnlojet/nnlojet_algo.py | 55 ++++++- tools/nnlojet/nnlojet_plot.py | 143 +++++++++++++++++ tools/nnlojet/nnlojet_util.py | 268 ++++++++++++++++++++++++------- 4 files changed, 485 insertions(+), 133 deletions(-) create mode 100755 tools/nnlojet/nnlojet_plot.py diff --git a/tools/nnlojet/nnlojet-combine.py b/tools/nnlojet/nnlojet-combine.py index dc7a772f..a770c051 100755 --- a/tools/nnlojet/nnlojet-combine.py +++ b/tools/nnlojet/nnlojet-combine.py @@ -8,22 +8,22 @@ import glob import re import ast import multiprocessing as mp -#import nnlojet_plot import logging from logging.handlers import QueueHandler, QueueListener from nnlojet_util import NNLOJETHistogram, NNLOJETContainer +import nnlojet_plot class Task_part(): # merging options and default values - _trim_threshold = 4.0 - _trim_max_frac = 0.05 - _kscan_maxdev_unwgt = None # needs another safety feature (inactive by default) - _kscan_nsteps = 3 - _kscan_maxdev_steps = 0.2 - _merge_weighted = True + _default_trim_threshold = 3.5 + _default_trim_max_frac = 0.01 + _default_kscan_maxdev_unwgt = None # needs another safety feature (inactive by default) + _default_kscan_nsteps = 3 + _default_kscan_maxdev_steps = 0.5 + _default_merge_weighted = True def __init__(self, **kwargs): self._nx = kwargs.get('nx', 3) @@ -32,6 +32,14 @@ class Task_part(): self._qweights = kwargs.get('weights', False) self._columns = kwargs.get('columns', None) self._rebin = kwargs.get('rebin', None) + self._cumulant = kwargs.get('cumulant', None) + # copy to instance variable to preserve between processes... + self._trim_threshold = Task_part._default_trim_threshold + self._trim_max_frac = Task_part._default_trim_max_frac + self._kscan_maxdev_unwgt = Task_part._default_kscan_maxdev_unwgt + self._kscan_nsteps = Task_part._default_kscan_nsteps + self._kscan_maxdev_steps = Task_part._default_kscan_maxdev_steps + self._merge_weighted = Task_part._default_merge_weighted def __str__(self): return '{} [{} file(s)]'.format(self._outfile, len(self._files)) @@ -42,13 +50,13 @@ class Task_part(): container = NNLOJETContainer(size=len(self._files), weights=self._qweights) for file in self._files: try: - container.append(NNLOJETHistogram(nx=self._nx, filename=file, columns=self._columns, rebin=self._rebin)) + container.append(NNLOJETHistogram(nx=self._nx, filename=file, columns=self._columns, rebin=self._rebin, cumulant=self._cumulant)) except ValueError as e: print(e) print("error reading file:", file) # start the merge - + outfile_unwgt = self._outfile[:-4] + '.UNWGT.dat' hist = container.merge() hist.write_to_file(outfile_unwgt) @@ -83,7 +91,7 @@ def process_parts_queue(parts_queue): parts_queue.task_done() -def read_APPLfast(wgt_file): +def read_weights(wgt_file): with open(wgt_file, 'rt') as file: hist_acc = NNLOJETHistogram() nx = 0 @@ -104,7 +112,7 @@ def read_APPLfast(wgt_file): def obs_generator(obs_list): cross_pattern = re.compile(r'.*cross.*') for obs_line in obs_list: - obs_parse = [ it.strip() for it in obs_line.split('>') ] + obs_parse = [it.strip() for it in obs_line.split('>')] if len(obs_parse) == 1: obs_in = obs_parse[0] obs_out = obs_parse[0] @@ -114,7 +122,19 @@ def obs_generator(obs_list): else: raise ValueError('invalid observables specification: {}'.format(obs_line)) nx = 0 if re.match(cross_pattern, obs_in) else 3 - yield (obs_line, obs_in, obs_out, nx) + #> do we have any cumulant labels? + obs_cum = None + obs_parse = obs_in.split() + if (len(obs_parse) == 2) and (nx == 3): + if obs_in == obs_out: + raise ValueError('cumulants need name override: {}'.format(obs_line)) + obs_in = obs_parse[0] + obs_cum = obs_parse[1] + nx = 1 + elif len(obs_parse) != 1: + raise ValueError('invalid observables specification: {}'.format(obs_line)) + # print([obs_line, obs_in, obs_out, nx, obs_cum]) + yield (obs_line, obs_in, obs_out, nx, obs_cum) def main(): @@ -126,9 +146,9 @@ def main(): # multi-process parser.add_argument('-j', '--jobs', type=int, nargs='?', action='store', default='1', help='Specifies the number of jobs to run simultaneously.') - # read in APPLfast weight table - parser.add_argument('--APPLfast', action='store', default=None, - help='Read in an APPLfast weight table and combine.') + # read in weights weight table + parser.add_argument('--weights', action='store', default=None, + help='Read in a weight table and combine.') # Parse the input arguments! args = parser.parse_args() @@ -136,10 +156,10 @@ def main(): config.optionxform = lambda option: option # do not convert to lower case config.read(args.config) - # read in APPLfast weight table? - wgt_file = args.APPLfast + # read in weight table? + wgt_file = args.weights if wgt_file is not None: - read_APPLfast(wgt_file) + read_weights(wgt_file) return cross_pattern = re.compile(r'.*cross.*') @@ -157,12 +177,12 @@ def main(): qrecursive = config.getboolean('Options', 'recursive', fallback=False) print("recursive search: {}".format('ON' if qrecursive else 'OFF')) - # # debug plots? - # qplot = config.getboolean('Options', 'plot', fallback=False) - # print("plotting: {}".format('ON' if qplot else 'OFF')) - # if qplot: - # outdir_plots = config.get('Paths', 'out_dir') + '/Plots' - # os.makedirs(outdir_plots, exist_ok=True) + # debug plots + qplot = config.getboolean('Options', 'plot', fallback=True) + print("plotting: {}".format('ON' if qplot else 'OFF')) + if qplot: + outdir_plots = config.get('Paths', 'out_dir') + '/Plots' + os.makedirs(outdir_plots, exist_ok=True) # output weight data for the combined files? qweights = config.getboolean('Options', 'weights', fallback=False) @@ -181,15 +201,15 @@ def main(): trim = ast.literal_eval(trim) if type(trim) is bool: if not trim: - Task_part._trim_threshold = None - Task_part._trim_max_frac = None + Task_part._default_trim_threshold = None + Task_part._default_trim_max_frac = None elif type(trim) is int or type(trim) is float: - Task_part._trim_threshold = trim - Task_part._trim_max_frac = None + Task_part._default_trim_threshold = trim + Task_part._default_trim_max_frac = None elif type(trim) is list or type(trim) is tuple: if len(trim) == 2: - Task_part._trim_threshold = trim[0] - Task_part._trim_max_frac = trim[1] + Task_part._default_trim_threshold = trim[0] + Task_part._default_trim_max_frac = trim[1] else: print("trim list invalid: {}".format(trim)) raise @@ -197,11 +217,11 @@ def main(): print("trim option invalid: {}".format(trim)) raise # digest settings - print("trim: ({},{})".format(Task_part._trim_threshold, Task_part._trim_max_frac)) + print("trim: ({},{})".format(Task_part._default_trim_threshold, Task_part._default_trim_max_frac)) # read optional flag to control weighted vs. unweighted average - Task_part._merge_weighted = config.getboolean('Options', 'weighted', fallback=True) - print("weighted: {}".format(Task_part._merge_weighted)) + Task_part._default_merge_weighted = config.getboolean('Options', 'weighted', fallback=True) + print("weighted: {}".format(Task_part._default_merge_weighted)) # read optional k-optimisation settings kscan = config.get('Options', 'k-scan', fallback=None) @@ -209,22 +229,22 @@ def main(): kscan = ast.literal_eval(kscan) if type(kscan) is bool: if not kscan: - Task_part._kscan_maxdev_unwgt = None - Task_part._kscan_nsteps = None - Task_part._kscan_maxdev_steps = None + Task_part._default_kscan_maxdev_unwgt = None + Task_part._default_kscan_nsteps = None + Task_part._default_kscan_maxdev_steps = None elif type(kscan) is int or type(kscan) is float: - Task_part._kscan_maxdev_unwgt = kscan - Task_part._kscan_nsteps = None - Task_part._kscan_maxdev_steps = None + Task_part._default_kscan_maxdev_unwgt = kscan + Task_part._default_kscan_nsteps = None + Task_part._default_kscan_maxdev_steps = None elif type(kscan) is list or type(kscan) is tuple: if len(kscan) == 2: - Task_part._kscan_maxdev_unwgt = None - Task_part._kscan_nsteps = kscan[0] - Task_part._kscan_maxdev_steps = kscan[1] + Task_part._default_kscan_maxdev_unwgt = None + Task_part._default_kscan_nsteps = kscan[0] + Task_part._default_kscan_maxdev_steps = kscan[1] elif len(kscan) == 3: - Task_part._kscan_maxdev_unwgt = kscan[0] - Task_part._kscan_nsteps = kscan[1] - Task_part._kscan_maxdev_steps = kscan[2] + Task_part._default_kscan_maxdev_unwgt = kscan[0] + Task_part._default_kscan_nsteps = kscan[1] + Task_part._default_kscan_maxdev_steps = kscan[2] else: print("k-scan list invalid: {}".format(kscan)) raise @@ -232,7 +252,7 @@ def main(): print("k-scan option invalid: {}".format(kscan)) raise # digest settings - print("k-scan: ({},{},{})".format(Task_part._kscan_maxdev_unwgt, Task_part._kscan_nsteps, Task_part._kscan_maxdev_steps)) + print("k-scan: ({},{},{})".format(Task_part._default_kscan_maxdev_unwgt, Task_part._default_kscan_nsteps, Task_part._default_kscan_maxdev_steps)) # set up the log file #@todo: use package `multiprocessing-logging` in the future? @@ -280,13 +300,13 @@ def main(): print(""" - + /////////// // Parts // /////////// """) -# for (obs_line, obs_in, obs_out, nx) in obs_generator(obs_set): - for (obs_line, obs_in, obs_out, nx) in obs_generator(obs_list): +# for (obs_line, obs_in, obs_out, nx, obs_cum) in obs_generator(obs_set): + for (obs_line, obs_in, obs_out, nx, obs_cum) in obs_generator(obs_list): print("processing observable {}...".format(obs_out)) # combine the different observables @@ -306,7 +326,7 @@ def main(): #rebin = [ float(i) for i in rebin ] #print('rebin = ', rebin) - task = Task_part(nx=nx, files=files, outfile=outfile, weights=qweights, columns=columns, rebin=rebin) + task = Task_part(nx=nx, files=files, outfile=outfile, weights=qweights, columns=columns, rebin=rebin, cumulant=obs_cum) # print(" > submitting {}".format(outfile)) parts_queue.put(task) @@ -318,12 +338,12 @@ def main(): parts_queue.join() # # now loop over renames & re-binnings -# for (obs_line, obs_in, obs_out, nx) in obs_generator(obs_list): +# for (obs_line, obs_in, obs_out, nx, obs_cum) in obs_generator(obs_list): # # rebin = config.get('Observables', obs_line, fallback=None) # # # skip cases which we already dealt with above -# if (rebin is None) and (obs_in == obs_out): +# if (rebin is None) and (obs_in == obs_out): # continue # # if rebin is not None: @@ -343,7 +363,7 @@ def main(): # hist.write_to_file(outfile) - # merge + # merge if config.has_section('Merge'): print(""" @@ -351,7 +371,7 @@ def main(): // Merge // /////////// """) - for (obs_line, obs_in, obs_out, nx) in obs_generator(obs_list): + for (obs_line, obs_in, obs_out, nx, obs_cum) in obs_generator(obs_list): print("processing observable {}...".format(obs_out)) for mrg in config.options('Merge'): parts = config.get('Merge', mrg) @@ -375,6 +395,10 @@ def main(): ptfile = outdir_parts + '/' + mappt + '.' + obs_out + '.dat' hist = hist + NNLOJETHistogram(nx=nx, filename=ptfile, weights=qweights) hist.write_to_file(outfile) + # debug plots + if qplot: + with open(outdir_plots + '/' + mrg + '.' + obs_out + '.plus.plt', 'wt') as fout: + print(nnlojet_plot.plot_merge_plus(path='../Parts', obs=obs_out, merge=mrg, parts=' '.join(parts)), file=fout) elif "|" in parts: parts = list(map(str.strip, parts.split('|'))) @@ -385,7 +409,7 @@ def main(): ptfile = outdir_parts + '/' + mappt + '.' + obs_out + '.dat' hist = hist.overwrite(NNLOJETHistogram(nx=nx, filename=ptfile, weights=qweights)) hist.write_to_file(outfile) - + elif "&" in parts: parts = list(map(str.strip, parts.split('&'))) print(" > Merge[&]: {} = {}...".format(mrg, parts)) @@ -402,12 +426,10 @@ def main(): # weighted average hist = container.merge(weighted=True) hist.write_to_file(outfile) - # # debug plots - # if qplot: - # reldir_parts = '../Parts' - # with open(outdir_plots + '/' + mrg + '.' + obs_out + '.plt', 'wt') as fout: - # print(gnuplot_header(mrg + '.' + obs_out + '.pdf'), file=fout) - # print(gnuplot_footer(), file=fout) + # debug plots + if qplot: + with open(outdir_plots + '/' + mrg + '.' + obs_out + '.ratio.plt', 'wt') as fout: + print(nnlojet_plot.plot_merge_and(path='../Parts', obs=obs_out, merge=mrg, parts=' '.join(parts)), file=fout) else: raise ValueError("couldn't find '|', '&' nor '+' in the merge:", parts) @@ -419,7 +441,7 @@ def main(): // Final // /////////// """) - for (obs_line, obs_in, obs_out, nx) in obs_generator(obs_list): + for (obs_line, obs_in, obs_out, nx, obs_cum) in obs_generator(obs_list): print("processing observable {}...".format(obs_out)) # assemble final histograms for fin in config.options('Final'): @@ -433,9 +455,9 @@ def main(): hist.write_to_file(outfile) # write to the file if qweights: - with open(outdir_final + '/' + fin + '.' + obs_out + '.APPLfast.txt', 'wt') as fout: - print(hist.to_APPLfast_weight(), file=fout) - + with open(outdir_final + '/' + fin + '.' + obs_out + '.weights.txt', 'wt') as fout: + print(hist.to_weights(), file=fout) + if __name__ == "__main__": main() diff --git a/tools/nnlojet/nnlojet_algo.py b/tools/nnlojet/nnlojet_algo.py index c9cae8cb..04c3d439 100755 --- a/tools/nnlojet/nnlojet_algo.py +++ b/tools/nnlojet/nnlojet_algo.py @@ -1,6 +1,9 @@ #!/usr/bin/env python3 import numpy as np +import math + +# import matplotlib.pyplot as plt def is_outlier_MAD(points, thresh=3.5): @@ -17,22 +20,57 @@ def is_outlier_MAD(points, thresh=3.5): return modified_z_score > thresh -def is_outlier_doubleMAD(points, thresh=3.5): +def is_outlier_doubleMAD(points, thresh=3.5, qplt=False): + #> maybe even want to swith to the "Harrell-Davis quantile estimator" in the future? median = np.nanmedian(points) abs_diff = np.abs(points - median) - left_mad = np.nanmedian(abs_diff[points <= median]) right_mad = np.nanmedian(abs_diff[points >= median]) - if left_mad == 0. or right_mad == 0.: + # means all entries are a constant (propbably zero) modified_z_score = 0. * abs_diff - - else: - y_mad = left_mad * np.ones(len(points)) + else: + y_mad = np.zeros(len(points)) + y_mad[points < median] = left_mad y_mad[points > median] = right_mad + y_mad[points == median] = 0.5* (left_mad+right_mad) modified_z_score = 0.6745 * abs_diff / y_mad - modified_z_score[points == median] = 0. + + # if qplt: + # print(">>> doubleMAD [ {} | {} | {} ]".format(left_mad,median,right_mad)) + # nbins = 50 + # range = [ min(points), max(points) ] + # plt.hist(points, range=range, bins=nbins, log=True, label='all') + # plt.hist(points[modified_z_score > thresh], range=range, bins=nbins, log=True, label='trim') + # plt.show() + + return modified_z_score > thresh + + +def is_outlier_dynMAD(points, thresh=3.5, dsigma=0.6745, qplt=False): + frac = math.erf(dsigma / math.sqrt(2.)) + #print(">>> dsigma = {}; frac = {}".format(dsigma, frac)) + low, med, upp = np.nanquantile(points, + [0.5 * (1. - frac), 0.5, 0.5 * (1. + frac)], + method="normal_unbiased") + if low == med or upp == med or med == np.nan: + # means all entries are a constant (propbably zero) + modified_z_score = np.zeros(len(points)) + else: + y_mad = np.zeros(len(points)) + y_mad[points < med] = abs(med - low) # left_mad + y_mad[points > med] = abs(upp - med) # right_mad + y_mad[points == med] = 0.5 * abs(upp - low) + modified_z_score = dsigma * np.abs(points - med) / y_mad + + # if qplt: + # print(">>> dyn MAD [ {} | {} | {} ]".format(left_mad,median,right_mad)) + # nbins = 50 + # range = [ min(points), max(points) ] + # plt.hist(points, range=range, bins=nbins, log=True, label='all') + # plt.hist(points[modified_z_score > thresh], range=range, bins=nbins, log=True, label='trim') + # plt.show() return modified_z_score > thresh @@ -52,3 +90,6 @@ def is_outlier_IQR(points, thresh=1.5): # return (points < lower_fence) | (points > upper_fence) return np.array([ (pt < lower_fence) | (pt > upper_fence) if np.isfinite(pt) else False for pt in points ]) + + + diff --git a/tools/nnlojet/nnlojet_plot.py b/tools/nnlojet/nnlojet_plot.py new file mode 100755 index 00000000..ba19bdcc --- /dev/null +++ b/tools/nnlojet/nnlojet_plot.py @@ -0,0 +1,143 @@ +#!/usr/bin/env python3 + + +def plot_merge_and(**kwargs): + gnuplot = r''' +set terminal pdfcairo enhanced color size 10cm,10cm dashed # font "Iosevka,7" fontscale 0.65 +set encoding utf8 +set style fill transparent pattern border +set pointintervalbox 0.001 +''' + #> insert all the info into the template + gnuplot += r''' +PATH = '{path}' +OBS = '{obs}' +MERGE = '{merge}' +PARTS = '{parts}' +'''.format(**kwargs) + gnuplot += r''' +merged = PATH.'/'.MERGE.'.'.OBS.'.dat' + +#> parse the merged histogram file to excract some information on the data structure we're dealing with +eval system("awk 'BEGIN{bin_acc=0.;bin_num=-1;bin_lst=0.;}$1!~/^#/{bin_num++;if(bin_num>0){bin_acc+=($3-$1)/bin_lst;}bin_lst=$3-$1;}$1~/^#labels:/{printf(\"ncol = %d;\", gensub(/.+\\[([0-9]+)\\]$/, \"\\\\1\", \"g\", $(NF)));}$1~/^#nx:/{printf(\"nx = %d;\", $2);}END{printf(\"bin_fac = %e;\",bin_acc/bin_num);}' ".merged) +#> only plots for distributions +if (nx != 3) quit; + +label(i) = system(sprintf("awk '$1~/^#labels:/{print $(%d)}' ",i).merged) # comment prefix causes one offset + +#> (x-a)/(x+a) & only take into account error from `x`, values in ‰ +ratio(part) = "< paste ".merged." ".part." | ".sprintf("awk 'BEGIN{nx=%d;ncol=%d;}$1!~/^#/&&$1==$(ncol+1)&&$(nx)==$(ncol+nx){for(i=1;i<=nx;i++){printf(\"%e \",$(i))}for(i=nx+1;i<ncol;i+=2){j=ncol+i;x=$(j);a=$(i);if(x==0.&&a==0.){val=0.;err=0.;}else{val=(x-a)/(x+a);err=$(j+1)*2.*a/(x+a)**2};printf(\" %e %e \",val*1e3,err*1e3);}printf(\"\\n\");}' 2> /dev/null", nx,ncol) +#> (x-a)/Δa & only take into account error from `x` +dev(part) = "< paste ".merged." ".part." | ".sprintf("awk 'BEGIN{nx=%d;ncol=%d;}$1!~/^#/&&$1==$(ncol+1)&&$(nx)==$(ncol+nx){for(i=1;i<=nx;i++){printf(\"%e \",$(i))}for(i=nx+1;i<ncol;i+=2){j=ncol+i;x=$(j);a=$(i);if(x==0.&&a==0.){val=0.;err=0.;}else{val=(x-a)/$(i+1);err=$(j+1)/$(i+1)};printf(\" %e %e \",val,err);}printf(\"\\n\");}' 2> /dev/null", nx,ncol) + +set output MERGE.'.'.OBS.'.pdf' + +if (bin_fac > 1.1) { + set log x + bin_loc(xlow,xupp,pos) = exp(log(xlow) + pos*(log(xupp)-log(xlow))) +} else { + unset log x + bin_loc(xlow,xupp,pos) = xlow + pos*(xupp-xlow) +} +set format y '%g' + +set lmargin 10 +set key top left horizontal +unset colorbox # remove palette box + +#> @TODO: injection point of custom commands? + +do for [t = 1:(ncol-nx)/2] { + col = nx+2*t-1; + #set title label(col+1) noenhanced + set multiplot layout 2,1 title label(col+1) noenhanced + set ylabel '(X − ref) / (X + ref) [‰]' noenhanced + set format x '' + unset xlabel + plot \ + ratio(PATH.'/'.MERGE.'.'.OBS.'.dat')." | awk '$1!~/^#/{print $0;$1=$3;print $0;}'" u 1:(column(col)+column(col+1)):(column(col)-column(col+1)) w filledc lc rgbcolor 0 fs transparent solid 0.3 t MERGE noenhanced, \ + for [i=1:words(PARTS)] ratio(PATH.'/'.word(PARTS, i).'.'.OBS.'.dat') u (bin_loc($1,$3,0.25+i*0.5/(words(PARTS)+1.))):col:(column(col+1)) w err lc palette frac i/(words(PARTS)+1.) pt i ps 0.2 t word(PARTS, i) noenhanced + set ylabel '(X − ref) / Δref' noenhanced + set format x '%g' + set xlabel OBS noenhanced + plot \ + dev(PATH.'/'.MERGE.'.'.OBS.'.dat')." | awk '$1!~/^#/{print $0;$1=$3;print $0;}'" u 1:(column(col)+column(col+1)):(column(col)-column(col+1)) w filledc lc rgbcolor 0 fs transparent solid 0.3 notitle, \ + for [i=1:words(PARTS)] dev(PATH.'/'.word(PARTS, i).'.'.OBS.'.dat') u (bin_loc($1,$3,0.25+i*0.5/(words(PARTS)+1.))):col:(column(col+1)) w err lc palette frac i/(words(PARTS)+1.) pt i ps 0.2 notitle + unset multiplot +} + +unset output +''' + return gnuplot + + +def plot_merge_plus(**kwargs): + gnuplot = r''' +set terminal pdfcairo enhanced color size 10cm,10cm dashed # font "Iosevka,7" fontscale 0.65 +set encoding utf8 +set style fill transparent pattern border +set pointintervalbox 0.001 +''' + #> insert all the info into the template + gnuplot += r''' +PATH = '{path}' +OBS = '{obs}' +MERGE = '{merge}' +PARTS = '{parts}' +'''.format(**kwargs) + gnuplot += r''' +merged = PATH.'/'.MERGE.'.'.OBS.'.dat' + +#> parse the merged histogram file to excract some information on the data structure we're dealing with +eval system("awk 'BEGIN{bin_acc=0.;bin_num=-1;bin_lst=0.;}$1!~/^#/{bin_num++;if(bin_num>0){bin_acc+=($3-$1)/bin_lst;}bin_lst=$3-$1;}$1~/^#labels:/{printf(\"ncol = %d;\", gensub(/.+\\[([0-9]+)\\]$/, \"\\\\1\", \"g\", $(NF)));}$1~/^#nx:/{printf(\"nx = %d;\", $2);}END{printf(\"bin_fac = %e;\",bin_acc/bin_num);}' ".merged) +#> only plots for distributions +if (nx != 3) quit; + +label(i) = system(sprintf("awk '$1~/^#labels:/{print $(%d)}' ",i).merged) # comment prefix causes one offset + +#> x/a & only take into account error from `x`, values in % +ratio(part) = "< paste ".merged." ".part." | ".sprintf("awk 'BEGIN{nx=%d;ncol=%d;}$1!~/^#/&&$1==$(ncol+1)&&$(nx)==$(ncol+nx){for(i=1;i<=nx;i++){printf(\"%e \",$(i))}for(i=nx+1;i<ncol;i+=2){j=ncol+i;x=$(j);a=$(i);if(x==0.&&a==0.){val=0.;err=0.;}else{val=x/a;err=$(j+1)/a};printf(\" %e %e \",val*1e2,err*1e2);}printf(\"\\n\");}' 2> /dev/null", nx,ncol) + +set output MERGE.'.'.OBS.'.pdf' + +if (bin_fac > 1.1) { + set log x + bin_loc(xlow,xupp,pos) = exp(log(xlow) + pos*(log(xupp)-log(xlow))) +} else { + unset log x + bin_loc(xlow,xupp,pos) = xlow + pos*(xupp-xlow) +} +set format y '%g' + +set lmargin 10 +set key top left horizontal +unset colorbox # remove palette box + +#> @TODO: injection point of custom commands + +do for [t = 1:(ncol-nx)/2] { + col = nx+2*t-1; + #set title label(col+1) noenhanced + set multiplot layout 2,1 title label(col+1) noenhanced + set ylabel 'Δ' noenhanced + set format x '' + unset xlabel + set log y + plot \ + "< awk '$1!~/^#/{print $0;$1=$3;print $0;}' ".merged u 1:(column(col+1)) w l lc rgbcolor 0 t MERGE noenhanced, \ + for [i=1:words(PARTS)] PATH.'/'.word(PARTS, i).'.'.OBS.'.dat' u (bin_loc($1,$3,0.5)):(column(col+1)) w l lc palette frac i/(words(PARTS)+1.) dt i t word(PARTS, i) noenhanced + set ylabel 'X / ref [%]' noenhanced + set format x '%g' + set xlabel OBS noenhanced + unset log y + plot \ + ratio(PATH.'/'.MERGE.'.'.OBS.'.dat')." | awk '$1!~/^#/{print $0;$1=$3;print $0;}'" u 1:(column(col)+column(col+1)):(column(col)-column(col+1)) w filledc lc rgbcolor 0 fs transparent solid 0.3 notitle, \ + for [i=1:words(PARTS)] ratio(PATH.'/'.word(PARTS, i).'.'.OBS.'.dat') u (bin_loc($1,$3,0.25+i*0.5/(words(PARTS)+1.))):col:(column(col+1)) w err lc palette frac i/(words(PARTS)+1.) pt i ps 0.2 notitle + unset multiplot +} + +unset output +''' + return gnuplot + + diff --git a/tools/nnlojet/nnlojet_util.py b/tools/nnlojet/nnlojet_util.py index 43bfa858..15628784 100755 --- a/tools/nnlojet/nnlojet_util.py +++ b/tools/nnlojet/nnlojet_util.py @@ -37,9 +37,10 @@ class NNLOJETHistogram(): rebin = kwargs.get('rebin', None) if rebin is not None and self.nx <= 0: raise ValueError('rebinning requires x-columns!') - self._read_dat(columns=columns, rebin=rebin) + cumulant = kwargs.get('cumulant', None) + self._read_dat(columns=columns, rebin=rebin, cumulant=cumulant) - # to keep track of weights + #> to keep track of weights self._files_wgt = None # list of the files self._wgt = None if 'weights' in kwargs and kwargs.get('weights', False): @@ -72,7 +73,7 @@ class NNLOJETHistogram(): return NNLOJETHistogram.filename_suff(self.filename) - # without the setter, we do not allow setting the value after init + #> without the setter, we do not allow setting the value after init # @nx.setter # def nx(self, nx): self._nx = nx @@ -81,8 +82,9 @@ class NNLOJETHistogram(): columns = kwargs.get('columns', None) rebin = kwargs.get('rebin', None) + cumulant = kwargs.get('cumulant', None) - # step 1: parse label line + #> step 1: parse label line with open(self.filename, 'rt') as histfile: lines = histfile.readlines() @@ -92,21 +94,22 @@ class NNLOJETHistogram(): self._labels = [ re.sub(r'\[[0-9]+\]', '', lab) for lab in line.split()[1:] ] # remove "#labels" part # break if re.match(r'^\s*' + _comment_prefix + 'nx', line, re.IGNORECASE): + #> nx comment line overrides defaults set by filename self._nx = int(line.split()[1]) # remove "#nx" part # print("using nx = {}".format(self._nx)) # fh.close() - # warning if no labels line could be found + #> warning if no labels line could be found if self._labels is None: logging.warning("WARNING: couldn't find a labels line {}".format(self.filename)) return - # default init + #> default init self._ioverflow = None nrows = 0 ncols = len(self._labels) # raw column size - # build index list (only y-columns; no x-val) + #> build index list (only y-columns; no x-val) if columns is not None: idx_cols = [] for col in columns: @@ -123,8 +126,8 @@ class NNLOJETHistogram(): else: idx_cols = range(self.nx, ncols, 2) - # step 2: read in the data - comment_pattern = re.compile(r'^\s*' + _comment_prefix) + #> step 2: read in the data + comment_pattern = re.compile(r'^\s*' + _comment_prefix) # dynamic arrays: python lists so much faster than numpy arrays data_xval = [] if self.nx > 0 else None @@ -135,10 +138,10 @@ class NNLOJETHistogram(): for line in lines: #----- parse comments if re.match(comment_pattern, line): - # neval + #> neval if re.match(r'^\s*' + _comment_prefix + 'neval', line, re.IGNORECASE): self._neval = int(line.split()[1]) - # overflow + #> overflow if re.match(r'^\s*' + _comment_prefix + 'overflow', line, re.IGNORECASE): data = line.split() # make a list if len(data) != ncols: @@ -161,13 +164,13 @@ class NNLOJETHistogram(): nrows += 1 # fh.close() - # set neval to 1 if there was none + #> set neval to 1 if there was none if self._neval is None: raise ValueError("couldn't find the neval information") print("couldn't find the neval info: {}".format(self.filename), file=sys.stderr) self._neval = 1 - - # step 3: rebin + + #> step 3: rebin if rebin is not None: if self.nx != 3: raise ValueError('rebinning can only handle nx=3 for now') @@ -179,36 +182,42 @@ class NNLOJETHistogram(): i_rebin = 0 i_overflow = None for i_row in range(nrows): - # overflow is added immediately + #> overflow is added immediately if i_row == self._ioverflow: rebin_xval.append(data_xval[i_row]) rebin_yval.append(data_yval[i_row]) rebin_yerr.append(data_yerr[i_row]) i_overflow = i_rebin + #> in order to merge with "left-over" bins, need to accumulate things properly + for i_col in range(len(rebin_yval[i_overflow])): + rebin_yval[i_overflow][i_col] = self._neval * data_yval[i_row][i_col] + rebin_yerr[i_overflow][i_col] = (self._neval * data_yerr[i_row][i_col])**2 + self._neval * (data_yval[i_row][i_col])**2 #> do *not* increment! otherwise, we lose a bin #i_rebin += 1 continue # # debug # print("data_xval[{}][0] = ".format(i_row), type(data_xval[i_row][0]), data_xval[i_row][0]) # print("rebin[i_rebin] = ".format(i_rebin), type(rebin[i_rebin]), rebin[i_rebin]) - # find the first matching bin - if data_xval[i_row][0] < rebin[i_rebin] and data_xval[i_row][2] <= rebin[i_rebin]: - continue - # skip trailing bins - if i_rebin == len(rebin)-1: + # find the first matching bin or deal with the trailing bins + if (data_xval[i_row][0] < rebin[i_rebin] and data_xval[i_row][2] <= rebin[i_rebin]) or (i_rebin >= len(rebin)-1): + #> these are "left-over" bins and should be accumulated into the overflow + delta = data_xval[i_row][2] - data_xval[i_row][0] + for i_col in range(len(rebin_yval[i_overflow])): + rebin_yval[i_overflow][i_col] += self._neval * delta*data_yval[i_row][i_col] + rebin_yerr[i_overflow][i_col] += (self._neval * delta*data_yerr[i_row][i_col])**2 + self._neval * (delta*data_yval[i_row][i_col])**2 continue - # init + #> init if xlow is None: rebin_row_val = [0.] * len(data_yval[i_row]) rebin_row_err = [0.] * len(data_yval[i_row]) xlow = data_xval[i_row][0] - # accumulate data + #> accumulate data if data_xval[i_row][0] >= rebin[i_rebin] and data_xval[i_row][2] <= rebin[i_rebin+1]: delta = data_xval[i_row][2] - data_xval[i_row][0] for i_col in range(len(rebin_row_val)): rebin_row_val[i_col] += self._neval * delta*data_yval[i_row][i_col] rebin_row_err[i_col] += (self._neval * delta*data_yerr[i_row][i_col])**2 + self._neval * (delta*data_yval[i_row][i_col])**2 - # finalise + #> finalise row if i_row == nrows-1 or data_xval[i_row+1][0] >= rebin[i_rebin+1]: xupp = data_xval[i_row][2] delta = xupp - xlow @@ -218,33 +227,84 @@ class NNLOJETHistogram(): for i_col in range(len(rebin_row_val)): rebin_row_val[i_col] /= self._neval rebin_row_err[i_col] = math.sqrt( rebin_row_err[i_col] - self._neval * rebin_row_val[i_col]**2 ) / self._neval - # differential: + #> differential: rebin_row_val[i_col] /= delta rebin_row_err[i_col] /= delta rebin_yval.append(rebin_row_val) rebin_yerr.append(rebin_row_err) i_rebin += 1 - # reset accumulators + #> reset accumulators rebin_row_val, rebin_row_err = None, None xlow, xupp = None, None continue + # finalise the overflow bin + for i_col in range(len(rebin_yval[i_overflow])): + rebin_yval[i_overflow][i_col] /= self._neval + rebin_yerr[i_overflow][i_col] = math.sqrt( rebin_yerr[i_overflow][i_col] - self._neval * rebin_yval[i_overflow][i_col]**2 ) / self._neval self._ioverflow = i_overflow data_xval = rebin_xval data_yval = rebin_yval data_yerr = rebin_yerr - # step 4: store to member vars as numpy arrays + #> step 4: cumulant option + if cumulant is not None: + if self.nx != 3: + raise ValueError('cumulant can only handle nx=3 for now') + if (cumulant == '+') or (cumulant == '++'): + cum_range = reversed(range(len(data_yval))) + cum_xval = [[data_xval[i_row][0]] for i_row in range(len(data_yval))] + if self._labels is not None: + self._labels.pop(1) + self._labels.pop(1) + elif (cumulant == '-') or (cumulant == '--'): + cum_range = range(len(data_yval)) + cum_xval = [[data_xval[i_row][2]] for i_row in range(len(data_yval))] + if self._labels is not None: + self._labels.pop(0) + self._labels.pop(0) + else: + raise ValueError('cumulant label invalid: {}'.format(cumulant)) + #> cumulants only have one x-value + self._nx = 1 + #> the accumulator & new x-values + cum_row_val = [0.] * len(data_yval[0]) + cum_row_err = [0.] * len(data_yval[0]) + #> deal with the overflow first + if (cumulant == '++') or (cumulant == '--'): + i_overflow = self._ioverflow + for i_col in range(len(data_yval[i_overflow])): + cum_row_val[i_col] += self._neval * data_yval[i_overflow][i_col] + cum_row_err[i_col] += (self._neval * data_yerr[i_overflow][i_col])**2 + self._neval * (data_yval[i_overflow][i_col])**2 + #> used up => reset + data_yval[i_overflow][i_col] = 0. + data_yerr[i_overflow][i_col] = 0. + #> sum up the bins with the order determined above + for i_row in cum_range: + if i_row == self._ioverflow: + continue + delta = data_xval[i_row][2] - data_xval[i_row][0] + for i_col in range(len(data_yval[i_row])): + cum_row_val[i_col] += self._neval * delta*data_yval[i_row][i_col] + cum_row_err[i_col] += (self._neval * delta*data_yerr[i_row][i_col])**2 + self._neval * (delta*data_yval[i_row][i_col])**2 + #> overwrite + data_yval[i_row][i_col] = cum_row_val[i_col] / self._neval + data_yerr[i_row][i_col] = math.sqrt( cum_row_err[i_col] - cum_row_val[i_col]**2 / self._neval ) / self._neval + #> cumulants only have one x-value: overwrite + data_xval = cum_xval + self._nx = 1 + + #> step 5: store to member vars as numpy arrays if data_xval is not None: - self._xval = np.array(data_xval, dtype='float') - self._yval = np.array(data_yval, dtype='float') - self._yerr = np.array(data_yerr, dtype='float') + self._xval = np.array(data_xval, dtype='float') + self._yval = np.array(data_yval, dtype='float') + self._yerr = np.array(data_yerr, dtype='float') - # warning if all entries are zero + #> warning if all entries are zero if (not np.any(self._yval)) and (not np.any(self._yerr)): logging.warning("WARNING: all entries zero for {}".format(self.filename)) - # warning if nan or inf + #> warning if nan or inf if np.any(np.logical_not(np.isfinite(self._yval))) or np.any(np.logical_not(np.isfinite(self._yerr))): logging.warning("WARNING: NaN or Inf encountered in {}".format(self.filename)) @@ -313,7 +373,7 @@ class NNLOJETHistogram(): return '\n'.join(lines) - def to_APPLfast_weight(self): + def to_weights(self): # construct string line by line lines = [] @@ -494,7 +554,7 @@ class NNLOJETContainer(): self._mask = None # it's much more efficient pre-allocating a sufficiently - # big array, instead of dynamically growingit + # big array, instead of dynamically adjusting it self._buffer_size = kwargs.get('size', None) self._size = 0 @@ -545,7 +605,7 @@ class NNLOJETContainer(): self._yerr = np.zeros((ncols, nrows, self._buffer_size), dtype='float') self._mask = np.ones((ncols, nrows, self._buffer_size), dtype='int') self._mask *= 2 # means invalid - # copy over + # copy over if self._size != 0: raise ValueError('first append and non-zero size?!') self._yval[:, :, self._size] = hist._yval[:, :] @@ -579,7 +639,7 @@ class NNLOJETContainer(): self._yerr[:, :, self._size] = hist._yerr[:, :] self._mask[:, :, self._size] = np.logical_or(np.logical_not(np.isfinite(hist._yval[:, :])), np.logical_not(np.isfinite(hist._yerr[:, :]))).astype(int) * 2 # self._mask[:, :, self._size] = np.isnan(hist._yerr[:, :]).astype(int) * 2 - self._size += 1 + self._size += 1 else: # the inefficient implementation... self._yval = np.append(self._yval, np.expand_dims(hist._yval, axis=2), axis=2) @@ -593,7 +653,7 @@ class NNLOJETContainer(): def _recursive_k_weights(self, i_row, i_col, wgts_in): - n_files = len(wgts_in) + n_files = len(wgts_in) wgts_out = [0.] * n_files leafs = [ [] for i in range(n_files) ] @@ -642,7 +702,7 @@ class NNLOJETContainer(): # def _recursive_k_weights(self, i_row, i_col, wgts_in): # wgts_out = np.zeros(wgts_in.shape) # n_files = len(wgts_in) -# +# # # recursively search where the runs have been merged into # # do this once at the beginning to save time (one inner loop) # targets = - np.ones(wgts_in.shape) @@ -657,10 +717,10 @@ class NNLOJETContainer(): # if self._mask[i_row, i_col, target] > 0: # raise ValueError('leaf terminating on a trimmed entry?!') # targets[i_file] = target -# +# # for i_file in range(n_files): # # find all the leafs that were merged into current `i_file` -# i_leafs = set(np.where(targets == i_file)[0]) +# i_leafs = set(np.where(targets == i_file)[0]) # # compute ntot needed for the unweighted partial weight # ntot = 0 # for i_leaf in i_leafs: @@ -668,7 +728,7 @@ class NNLOJETContainer(): # # take the weight in `i_file` and distribute them # for i_leaf in i_leafs: # wgts_out[i_leaf] = wgts_in[i_file] * self._neval[i_leaf] / ntot -# +# # return wgts_out @@ -694,7 +754,7 @@ class NNLOJETContainer(): if ntot != 0: yval /= ntot - if self._qwgt and not skip_wgt: + if self._qwgt and not skip_wgt: wgts /= ntot wgts = self._recursive_k_weights(i_row, i_col, wgts) yerr = math.sqrt( yerr - ntot * yval**2 ) / ntot @@ -765,18 +825,18 @@ class NNLOJETContainer(): for i_col in range(n_cols): if weighted: - ( result._yval[i_row, i_col], - result._yerr[i_row, i_col], + ( result._yval[i_row, i_col], + result._yerr[i_row, i_col], wgts ) = self._merge_weighted_bin(i_row, i_col) else: - ( result._yval[i_row, i_col], - result._yerr[i_row, i_col], + ( result._yval[i_row, i_col], + result._yerr[i_row, i_col], wgts ) = self._merge_bin(i_row, i_col) if self._qwgt: result._wgt[i_row, i_col, :] = wgts[:] - + return result @@ -790,7 +850,7 @@ class NNLOJETContainer(): # self._buffer_size[:, :, self._size:] = 1 - def mask_outliers(self, thresh=4., maxfrac=0.05): + def mask_outliers(self, thresh=3.5, maxfrac=0.01): if thresh <= 0.: raise ValueError("non-positive threshold value: {}".format(thresh)) if maxfrac <= 0. or maxfrac >= 1.: @@ -799,40 +859,125 @@ class NNLOJETContainer(): self.unmask() # what about running this *after* optimise_k ?! (mask[i,j,k] < 0) + # -> probably should avoid it as neval information is not saved after k-opt (n_rows, n_cols, n_files) = self._yval.shape if self._buffer_size is not None: n_files = self._size + #> leave room for one outlier + if n_files > 1: + maxfrac = max(maxfrac, 1.5/n_files) + + # # for an interval [med-fac*sigma, med+fac*sigma] of a normal distribution + # # the fraction of accepted jobs is: erf(fac/sqrt(2)) + # # we will match the l-estimator at 1/2 of the threshold (to have something dynamical) + # fac = thresh/2. + # frac = math.erf(fac/math.sqrt(2.)) + # # match where we can accommodate an outlier + # while (1.-frac < 1./n_files) and (frac > 0.5): + # fac /= 1.1 + # frac = math.erf(fac/math.sqrt(2.)) + # # want to have at least the median version (= double mad) + # if frac < 0.5: + # fac = 0.6745 + # frac = 0.5 + # print(">>> fac = {}; frac = {}; 1/n = {}".format(fac, frac, 1./n_files)) + + # for an interval [med-fac*sigma, med+fac*sigma] of a normal distribution + # the fraction of accepted jobs is: erf(fac/sqrt(2)) + fac = thresh + frac = math.erf(fac/math.sqrt(2.)) + # match where we can accommodate outliers + while (1.-frac < maxfrac): + fac /= 1.05 + frac = math.erf(fac/math.sqrt(2.)) + #> done: now match at a lower point but don't go lower than the median + fac = max(0.5*fac, 0.6745) + frac = math.erf(fac/math.sqrt(2.)) + # print(">>> fac = {}; frac = {}; 1/N = {}".format(fac, frac, 1./n_files)) + + bin_data = np.zeros(n_files, dtype='float') # single bin data for i_row in range(n_rows): for i_col in range(n_cols): + #> copy data & mask with nan where necessary + bin_data = np.where( + np.logical_and(self._yerr[i_row, i_col, :] != 0, + self._mask[i_row, i_col, :] == 0), + self._yval[i_row, i_col, :], np.nan) + if np.all(np.isnan(bin_data)): + continue + # print("+++++++++ ({},{})".format(i_row, i_col)) + # print("mask: {}".format(self._mask[i_row, i_col, :])) + # print("yerr: {}".format(self._yerr[i_row, i_col, :])) + # print("yval: {}".format(self._yval[i_row, i_col, :])) + # print("data: {}".format(bin_data)) + # print("+++++++++") dyn_thresh = thresh - # mask = nnlojet_algo.is_outlier_doubleMAD(self._yval[i_row, i_col, :], dyn_thresh).astype(int) - mask = nnlojet_algo.is_outlier_IQR(self._yval[i_row, i_col, :], dyn_thresh).astype(int) + dyn_fac = fac + # mask = nnlojet_algo.is_outlier_doubleMAD(bin_data, dyn_thresh) + # mask = nnlojet_algo.is_outlier_IQR(bin_data, dyn_thresh) + mask = nnlojet_algo.is_outlier_dynMAD(bin_data, dyn_thresh, dyn_fac) cutfrac = np.count_nonzero(mask) / n_files - # print("#", i_row, i_col, dyn_thresh, cutfrac, maxfrac, file=sys.stderr) - while cutfrac > maxfrac: + #zerofrac = np.count_nonzero(self._yerr[i_row, i_col, :] == 0.) / n_files + #print("#", i_row, i_col, dyn_thresh, cutfrac, maxfrac, zerofrac, file=sys.stderr) + while (cutfrac > maxfrac) and (dyn_fac < 2.*fac): dyn_thresh *= 1.1 - if dyn_thresh > 100.: - # unmask this bin for safety? - break - # mask = nnlojet_algo.is_outlier_doubleMAD(self._yval[i_row, i_col, :], dyn_thresh).astype(int) - mask = nnlojet_algo.is_outlier_IQR(self._yval[i_row, i_col, :], dyn_thresh).astype(int) + dyn_fac *= 1.05 + # mask = nnlojet_algo.is_outlier_doubleMAD(bin_data, dyn_thresh) + # mask = nnlojet_algo.is_outlier_IQR(bin_data, dyn_thresh) + mask = nnlojet_algo.is_outlier_dynMAD(bin_data, dyn_thresh, dyn_fac) cutfrac = np.count_nonzero(mask) / n_files - # print(">", i_row, i_col, dyn_thresh, cutfrac, maxfrac, file=sys.stderr) - # print("#", i_row, i_col, dyn_thresh, cutfrac, maxfrac, file=sys.stderr) + # print(">", i_row, i_col, dyn_thresh, dyn_fac, cutfrac, maxfrac, file=sys.stderr) + # print("#", i_row, i_col, dyn_thresh, dyn_fac, cutfrac, maxfrac, file=sys.stderr) + # mask = nnlojet_algo.is_outlier_dynMAD(bin_data, dyn_thresh, dyn_fac, True) + # print("out: {}".format(mask)) + # print("+++++++++") self._mask[i_row, i_col, mask == True] = 1 +# def mask_outliers(self, thresh=4., maxfrac=0.05): +# if thresh <= 0.: +# raise ValueError("non-positive threshold value: {}".format(thresh)) +# if maxfrac <= 0. or maxfrac >= 1.: +# raise ValueError("invalid maxfrac range: {}".format(maxfrac)) +# +# self.unmask() +# +# # what about running this *after* optimise_k ?! (mask[i,j,k] < 0) +# +# (n_rows, n_cols, n_files) = self._yval.shape +# if self._buffer_size is not None: +# n_files = self._size +# +# for i_row in range(n_rows): +# for i_col in range(n_cols): +# dyn_thresh = thresh +# mask = nnlojet_algo.is_outlier_doubleMAD(self._yval[i_row, i_col, :], dyn_thresh).astype(int) +# # mask = nnlojet_algo.is_outlier_IQR(self._yval[i_row, i_col, :], dyn_thresh).astype(int) +# cutfrac = np.count_nonzero(mask) / n_files +# # print("#", i_row, i_col, dyn_thresh, cutfrac, maxfrac, file=sys.stderr) +# while cutfrac > maxfrac: +# dyn_thresh *= 1.1 +# mask = nnlojet_algo.is_outlier_doubleMAD(self._yval[i_row, i_col, :], dyn_thresh).astype(int) +# # mask = nnlojet_algo.is_outlier_IQR(self._yval[i_row, i_col, :], dyn_thresh).astype(int) +# # if dyn_thresh > 100.: +# # # unmask this bin for safety? +# # break +# cutfrac = np.count_nonzero(mask) / n_files +# # print(">", i_row, i_col, dyn_thresh, cutfrac, maxfrac, file=sys.stderr) +# # print("#", i_row, i_col, dyn_thresh, cutfrac, maxfrac, file=sys.stderr) +# self._mask[i_row, i_col, mask == True] = 1 + def optimise_k(self, **kwargs): # parse arguments for the merge settings maxdev_unwgt = kwargs.get('maxdev_unwgt', None) maxdev_steps = kwargs.get('maxdev_steps', None) - nsteps = kwargs.get('nsteps', None) + nsteps = kwargs.get('nsteps', 0) # set up params - if nsteps is None: maxdev_steps = None + if nsteps == 0: maxdev_steps = None if maxdev_unwgt is None and maxdev_steps is None: return # raise ValueError("optimise_k termination condition missing") @@ -883,6 +1028,7 @@ class NNLOJETContainer(): for jstep in range(istep-1,-nsteps-1,-1): delta = abs(history[istep][0] - history[jstep][0]) sigma = math.sqrt(history[istep][1]**2 + history[jstep][1]**2) + # sigma = max(abs(history[istep][1]-history[jstep][1]),min(history[istep][1],history[jstep][1])) term_cond2 = term_cond2 and delta < maxdev_steps * sigma # --- termination of merges --- @@ -917,7 +1063,7 @@ class NNLOJETContainer(): # skip invalid upper while iupp > 0: upp = sort_index[iupp] - if (self._mask[i_row, i_col, upp] == 0 and + if (self._mask[i_row, i_col, upp] == 0 and neval[upp] != 0): break iupp -= 1 -- GitLab