From d00fc229cc556ef33627b282d3fdbbd0984f60c4 Mon Sep 17 00:00:00 2001 From: Felix Metzner <felix.metzner@kit.edu> Date: Tue, 21 May 2024 18:10:08 +0200 Subject: [PATCH] Updating notebooks. --- .../preparation_for_pyhf_fit.ipynb | 474 ++++++ .../testing_fit_routine_rdstar.ipynb | 1385 +++++++++++++++-- .../sys_covariance_evaluation.ipynb | 2 +- .../sys_shape_effect_studies.ipynb | 1299 ++++++++++++++++ 4 files changed, 3072 insertions(+), 88 deletions(-) create mode 100644 rdstar/offline_analysis/fitting/dedicated_fit_approach/notebooks/rdstar_fits/preparation_for_pyhf_fit.ipynb create mode 100644 rdstar/offline_analysis/fitting/dedicated_fit_approach/notebooks/systematics/sys_shape_effect_studies.ipynb diff --git a/rdstar/offline_analysis/fitting/dedicated_fit_approach/notebooks/rdstar_fits/preparation_for_pyhf_fit.ipynb b/rdstar/offline_analysis/fitting/dedicated_fit_approach/notebooks/rdstar_fits/preparation_for_pyhf_fit.ipynb new file mode 100644 index 000000000..b6dbe397d --- /dev/null +++ b/rdstar/offline_analysis/fitting/dedicated_fit_approach/notebooks/rdstar_fits/preparation_for_pyhf_fit.ipynb @@ -0,0 +1,474 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "6ed4ab75", + "metadata": {}, + "outputs": [], + "source": [ + "%load_ext autoreload\n", + "%autoreload 2" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "242d4a5c", + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "import copy\n", + "import numpy as np\n", + "import pandas as pd\n", + "\n", + "import matplotlib.pyplot as plt\n", + "\n", + "from collections import defaultdict\n", + "from IPython.core.display import display, HTML\n", + "\n", + "pd.set_option('display.max_columns', 999)\n", + "\n", + "display(HTML(\"<style>.container { width:100% !important; }</style>\"))\n", + "%matplotlib inline" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "85892d9f", + "metadata": {}, + "outputs": [], + "source": [ + "plt.switch_backend('module://ipykernel.pylab.backend_inline')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f028da2e", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "eb02c532", + "metadata": {}, + "outputs": [], + "source": [ + "from sysvar.utils import read_yaml \n", + "from sysvar.fit_setup import *" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "173c96e2", + "metadata": {}, + "outputs": [], + "source": [ + "from sysvar.eigendecomposer import *\n", + "from sysvar.visualize import *" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e68113ea", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b013d6b3", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cfc7c926", + "metadata": {}, + "outputs": [], + "source": [ + "columns = [\n", + " \"ROE_neextra__boCleanROE__bc\",\n", + " \"m2RecoilSignalSide_mbc_based_S1_CorrRes_with_AsymLaplaceSmear_RecoMode_after_2ndshift\",\n", + " \"__rdstar_fit_signal_component_id_with_reco_mode_info__\",\n", + " \"daughter__bo1__cm__spextraInfo__bodecayModeID__bc__bc\",\n", + " \"EXTENDED_SIG_ID\",\n", + " \"__tracking_systematic__\",\n", + " \"__weight_pid_corr_pi_ratio__\",\n", + " \"__weight_pid_corr_piFromK_ratio__\",\n", + " \"__weight_pid_corr_k_ratio__\",\n", + " \"__weight_pid_corr_kFromPi_ratio__\",\n", + " \"__weight_pid_corr_lepton_ratio__\",\n", + " \"__weight_slow_pi_ratio__\",\n", + " \"daughter__bo1__cm__spdaughter__bo0__cm__spdaughter__bo1__cm__spp__bc__bc__bc\",\n", + " \"daughter__bo1__cm__spdaughter__bo0__cm__spdaughter__bo1__cm__spPDG__bc__bc__bc\",\n", + " \"__weight_k_short_ratio__\",\n", + " \"__coulomb_corr_factor__\",\n", + " \"HAMMER_weight_Norm\",\n", + " \"HAMMER_weight_Signal\",\n", + " \"__weight_final_wHAMMER_wBF_corr__\",\n", + " \n", + " \"weight_ff_Dzerostar_nom\",\n", + " \"weight_ff_Dprimeone_nom\",\n", + " \"weight_ff_Done_nom\",\n", + " \"weight_ff_Dtwostar_nom\",\n", + " \"expNum\"\n", + "]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ac1adfcf", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0d67eb22", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "dc36cdff", + "metadata": {}, + "outputs": [], + "source": [ + "hammer_signal_up = [f\"HAMMER_weight_Signal_up{x}\" for x in range(9)]\n", + "hammer_signal_down = [f\"HAMMER_weight_Signal_down{x}\" for x in range(9)]\n", + "hammer_norm_up = [f\"HAMMER_weight_Norm_up{x}\" for x in range(9)]\n", + "hammer_norm_down = [f\"HAMMER_weight_Norm_down{x}\" for x in range(9)]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b7914370", + "metadata": {}, + "outputs": [], + "source": [ + "columns.extend(hammer_signal_up)\n", + "columns.extend(hammer_signal_down)\n", + "columns.extend(hammer_norm_up)\n", + "columns.extend(hammer_norm_down)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4255b4cd", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "32c150e0", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c6ce1811", + "metadata": {}, + "outputs": [], + "source": [ + "dstst_columns = [\n", + " \n", + " \"weight_ff_Dzerostar_var_0_up\",\n", + " \"weight_ff_Dzerostar_var_0_down\",\n", + " \"weight_ff_Dzerostar_var_1_up\",\n", + " \"weight_ff_Dzerostar_var_1_down\",\n", + " \"weight_ff_Dzerostar_var_2_up\",\n", + " \"weight_ff_Dzerostar_var_2_down\",\n", + " \n", + " \"weight_ff_Dprimeone_var_0_up\",\n", + " \"weight_ff_Dprimeone_var_0_down\",\n", + " \"weight_ff_Dprimeone_var_1_up\",\n", + " \"weight_ff_Dprimeone_var_1_down\",\n", + " \"weight_ff_Dprimeone_var_2_up\",\n", + " \"weight_ff_Dprimeone_var_2_down\",\n", + " \n", + " \"weight_ff_Done_var_0_up\",\n", + " \"weight_ff_Done_var_0_down\",\n", + " \"weight_ff_Done_var_1_up\",\n", + " \"weight_ff_Done_var_1_down\",\n", + " \"weight_ff_Done_var_2_up\",\n", + " \"weight_ff_Done_var_2_down\",\n", + " \"weight_ff_Done_var_3_up\",\n", + " \"weight_ff_Done_var_3_down\",\n", + " \n", + " \"weight_ff_Dtwostar_var_0_up\",\n", + " \"weight_ff_Dtwostar_var_0_down\",\n", + " \"weight_ff_Dtwostar_var_1_up\",\n", + " \"weight_ff_Dtwostar_var_1_down\",\n", + " \"weight_ff_Dtwostar_var_2_up\",\n", + " \"weight_ff_Dtwostar_var_2_down\",\n", + " \"weight_ff_Dtwostar_var_3_up\",\n", + " \"weight_ff_Dtwostar_var_3_down\",\n", + "]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "aa0e1f37", + "metadata": {}, + "outputs": [], + "source": [ + "columns.extend(dstst_columns)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "159e0044", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cea24504", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d608daf5", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "30267d0e", + "metadata": {}, + "outputs": [], + "source": [ + "base_path = \"/nfs/dust/belle2/user/metzner/rdstar_fit_infos/\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7a9e8f38", + "metadata": {}, + "outputs": [], + "source": [ + "df = pd.read_feather(\n", + " base_path + \"fit_dataframe_FitTest_Asimov_b24_BKGD2B2C2_with_sys_syshash0x1ff_V1Feb2024.feather\",\n", + " columns = columns\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "88ce3dc1", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "362e9922", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2f26ff56", + "metadata": {}, + "outputs": [], + "source": [ + "sig_id_dict = {\n", + " \"continuum\": [901, 902],\n", + " \"other_BBbar\": [712, 711, 722, 721, 761, 781, 751, 771, 762, 782, 752, 772, 763, 783, 753, 773, 764, 784, 754, 774, 801, 802, 833, 831, 834, 832, 860, 822, 824, 821, 823, 811, 813, 812, 815, 814, 841, 844, 842, 845, 843, 846, 851, 854, 852, 855, 853, 856],\n", + " \"BtoDststEllNu\": [511, 521, 531, 541, 551, 561, 512, 522, 532, 542, 552, 562, 513, 523, 533, 543, 553, 563, 514, 524, 534, 544, 554, 564, 611, 621, 631, 641, 651, 661, 612, 622, 632, 642, 652, 662, 613, 623, 633, 643, 653, 663, 614, 624, 634, 644, 654, 664],\n", + " \"B0toDstTauNu\": [22, 26, 24, 28],\n", + " \"B0toDstEllNu\": [42, 46, 44, 48],\n", + " \"B0toDTauNu\": [12, 14],\n", + " \"B0toDEllNu\": [32, 34],\n", + " \"BptoDstTauNu\": [21, 25, 23, 27],\n", + " \"BptoDstEllNu\": [41, 45, 43, 47],\n", + " \"BptoDTauNu\": [11, 13],\n", + " \"BptoDEllNu\": [31, 33],\n", + "}" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f014af29", + "metadata": {}, + "outputs": [], + "source": [ + "df[\"fit_ctgy\"] = \"-1\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "82baa869", + "metadata": {}, + "outputs": [], + "source": [ + "for name, ids in reversed(sig_id_dict.items()):\n", + " df.loc[df.EXTENDED_SIG_ID.isin(ids), \"fit_ctgy\"] = name" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e2803d52", + "metadata": {}, + "outputs": [], + "source": [ + "list(df[\"fit_ctgy\"].value_counts().keys())" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e58e7a98", + "metadata": {}, + "outputs": [], + "source": [ + "df.__tracking_systematic__.value_counts()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "96700c3a", + "metadata": {}, + "outputs": [], + "source": [ + "maps = {\n", + " 0.0035 : 1,\n", + " 0.0070 : 2,\n", + " 0.0105: 3,\n", + " 0.0140: 4,\n", + " 0.0175: 5,\n", + " 0.0210: 6,\n", + " 0.0245: 7\n", + "}" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "913010e0", + "metadata": {}, + "outputs": [], + "source": [ + "df[\"tracking_ctgy\"] = -1\n", + "for value, key in maps.items():\n", + " df.loc[df[\"__tracking_systematic__\"] == value, \"tracking_ctgy\"] = key" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e64ff3a8", + "metadata": {}, + "outputs": [], + "source": [ + "df[\"tracking_weight\"] = 1" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c043592d", + "metadata": {}, + "outputs": [], + "source": [ + "settings =read_yaml(\"template_setup\", \"belle\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8914a899", + "metadata": {}, + "outputs": [], + "source": [ + "track_eigen = EigenDecomposer(df, settings, \"track_eff\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "595543a8", + "metadata": {}, + "outputs": [], + "source": [ + "track_eigen.save_nominal_templates()\n", + "track_eigen.precision = 0.01\n", + "track_eigen.find_important_eigendimension_indices()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7c9388dc", + "metadata": {}, + "outputs": [], + "source": [ + "track_eigen.save_template_variations()" + ] + } + ], + "metadata": { + "git": { + "suppress_outputs": true + }, + "hide_input": false, + "kernelspec": { + "display_name": "Python 3 (Belle2)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.2" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} \ No newline at end of file diff --git a/rdstar/offline_analysis/fitting/dedicated_fit_approach/notebooks/rdstar_fits/testing_fit_routine_rdstar.ipynb b/rdstar/offline_analysis/fitting/dedicated_fit_approach/notebooks/rdstar_fits/testing_fit_routine_rdstar.ipynb index 367e6eb32..6abb6a7e1 100644 --- a/rdstar/offline_analysis/fitting/dedicated_fit_approach/notebooks/rdstar_fits/testing_fit_routine_rdstar.ipynb +++ b/rdstar/offline_analysis/fitting/dedicated_fit_approach/notebooks/rdstar_fits/testing_fit_routine_rdstar.ipynb @@ -405,15 +405,1301 @@ "metadata": {}, "outputs": [], "source": [ - "fit_status.minuit_instance.params" + "fit_status.minuit_instance.errordef" ] }, + { + "cell_type": "code", + "execution_count": null, + "id": "8d44196b", + "metadata": {}, + "outputs": [], + "source": [ + "[c.name for c in fit_status.fitter_instance.fit_setup_info.reco_channels]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5c62d0c5", + "metadata": {}, + "outputs": [], + "source": [] + }, { "cell_type": "code", "execution_count": null, "id": "6a31f7e0", "metadata": {}, "outputs": [], + "source": [ + "fit_status.minuit_instance.fixed[np.array([1,2,3])]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "813540bb", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b850c939", + "metadata": {}, + "outputs": [], + "source": [ + "fit_status.fitter_instance" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6d86eb3e", + "metadata": {}, + "outputs": [], + "source": [ + "from rdstar.offline_analysis.fitting.dedicated_fit_approach.plotting_tools import SystematicsShapePlotter, ShapePlotInfoContainer" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a530a111", + "metadata": {}, + "outputs": [], + "source": [ + "from rdstar.offline_analysis.fitting.dedicated_fit_approach import rdstar_systematics as rdstar_sys" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6c9dd4dd", + "metadata": {}, + "outputs": [], + "source": [ + "from rdstar.offline_analysis.fitting.dedicated_fit_approach.systematics_manager.shape_sys_evaluater import FitSetupBinningPerRecoChInfo, UpDownAndDiffSystematicsDetails, SysColVarManager" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "dd71a4a1", + "metadata": {}, + "outputs": [], + "source": [ + "from rdstar.offline_analysis.fitting.dedicated_fit_approach.fit_info_container import (\n", + " ComponentInfo,\n", + " FitSetupInfoContainer,\n", + " FitObservableInfo,\n", + " RecoChannelInfo,\n", + ")\n", + "\n", + "from typing import Tuple" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8f86472e", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c068b646", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "daf232fc", + "metadata": {}, + "outputs": [], + "source": [ + "test_sys_col_manager = SysColVarManager(base_df=fit_status.fitter_instance._mc_df, fit_setup=fit_status.fitter_instance.fit_setup_info)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8ed910ac", + "metadata": {}, + "outputs": [], + "source": [ + "bin_manager = test_sys_col_manager.get_bin_and_comp_mask_manager()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "452c8fc0", + "metadata": {}, + "outputs": [], + "source": [ + "bin_manager.binning_df" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0460334d", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9d889782", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3310335d", + "metadata": {}, + "outputs": [], + "source": [ + "def get_ordered_fit_observable_infos(\n", + " reco_ch_info: RecoChannelInfo,\n", + " invert_obs_order: bool,\n", + ") -> Tuple[FitObservableInfo, ...]:\n", + " if invert_obs_order:\n", + " return tuple(list(reco_ch_info.observable_infos)[::-1])\n", + " else:\n", + " return reco_ch_info.observable_infos" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e2eecac1", + "metadata": {}, + "outputs": [], + "source": [ + "def get_fit_obs_binning_col_name(fit_obs_id: int, inverted_order: bool) -> str:\n", + " if inverted_order:\n", + " return f\"inverted_binning_in_fit_obs_{fit_obs_id}\"\n", + " else:\n", + " return f\"binning_in_fit_obs_{fit_obs_id}\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9c7bd63f", + "metadata": {}, + "outputs": [], + "source": [ + "def add_2d_binning_column(\n", + " column_name: str,\n", + " df: pd.DataFrame,\n", + " fit_setup: FitSetupInfoContainer,\n", + " invert_obs_order: bool,\n", + ") -> None:\n", + " assert column_name not in df.columns, column_name\n", + " \n", + " df.loc[:, column_name] = np.nan\n", + " \n", + " for fo_id in range(len(fit_setup.observable_infos)):\n", + " df.loc[:, get_fit_obs_binning_col_name(fit_obs_id=fo_id, inverted_order=invert_obs_order)] = np.nan\n", + " \n", + " n_reco_ch: int = len(fit_setup.reco_channels)\n", + " \n", + " for reco_ch_id, reco_ch_info in enumerate(fit_setup.reco_channels):\n", + " assert isinstance(reco_ch_info, RecoChannelInfo), type(reco_ch_info)\n", + " \n", + " reco_ch_mask = reco_ch_info.get_mask(df=df, reco_mode_column_str=fit_setup.reco_mode_col)\n", + " \n", + " fit_obs_infos = get_ordered_fit_observable_infos(\n", + " reco_ch_info=reco_ch_info,\n", + " invert_obs_order=invert_obs_order,\n", + " )\n", + " \n", + " assert len(fit_obs_infos) == 2, (len(fit_obs_infos) ,fit_obs_infos)\n", + " \n", + " n_bins_per_obs_in_reco_ch: List[int] = [fo.n_bins for fo in fit_obs_infos]\n", + " \n", + " for fit_obs_id, fit_obs_info in enumerate(fit_obs_infos):\n", + " digitize_result = np.digitize(\n", + " df[reco_ch_mask][fit_obs_info.col_name].values,\n", + " fit_obs_info.bin_edges,\n", + " )\n", + " \n", + " df.loc[reco_ch_mask, get_fit_obs_binning_col_name(fit_obs_id=fit_obs_id, inverted_order=invert_obs_order)] = digitize_result - 1\n", + " \n", + " obs_0_ids = df[reco_ch_mask][get_fit_obs_binning_col_name(fit_obs_id=0, inverted_order=invert_obs_order)].values\n", + " obs_1_ids = df[reco_ch_mask][get_fit_obs_binning_col_name(fit_obs_id=1, inverted_order=invert_obs_order)].values\n", + " \n", + " df.loc[reco_ch_mask, column_name] = obs_0_ids * n_bins_per_obs_in_reco_ch[1] + obs_1_ids\n", + " " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c726d9b0", + "metadata": {}, + "outputs": [], + "source": [ + "test_df = copy.deepcopy(fit_status.fitter_instance._mc_df)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "006e4535", + "metadata": {}, + "outputs": [], + "source": [ + "add_2d_binning_column(\n", + " column_name=\"__test_full_binning_col__\",\n", + " df=test_df,\n", + " fit_setup=fit_status.fitter_instance.fit_setup_info,\n", + " invert_obs_order=False,\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8c6cfcca", + "metadata": {}, + "outputs": [], + "source": [ + "add_2d_binning_column(\n", + " column_name=\"__test_inverted_full_binning_col__\",\n", + " df=test_df,\n", + " fit_setup=fit_status.fitter_instance.fit_setup_info,\n", + " invert_obs_order=True,\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a9b04c40", + "metadata": {}, + "outputs": [], + "source": [ + "relevant_test_cols = [\"__test_full_binning_col__\"]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b0855d4c", + "metadata": {}, + "outputs": [], + "source": [ + "test_groupby_res = bin_manager.get_groupby_for(df=test_df, cols=relevant_test_cols)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "50f37581", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4417f3cd", + "metadata": {}, + "outputs": [], + "source": [ + "np.unique(test_groupby_res.agg([lambda x: np.unique(x).size]).values.flatten())" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1bc78a3a", + "metadata": {}, + "outputs": [], + "source": [ + "for comp_index, fit_comp in enumerate(fit_status.fitter_instance.fit_setup_info.components):\n", + " print(f\"comp_index {comp_index}: {fit_comp.latex_label}\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d86b0b71", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e8b49ff3", + "metadata": {}, + "outputs": [], + "source": [ + "test2_groupby_res = bin_manager.get_groupby_for(df=test_df, cols=[fit_status.fitter_instance.fit_setup_info.weight_info.col_name])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "41a5714c", + "metadata": {}, + "outputs": [], + "source": [ + "test2_groupby_res.sum()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "13283574", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "98453ec0", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c8ef18d7", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f2c88647", + "metadata": {}, + "outputs": [], + "source": [ + "test2_weights = test2_groupby_res.sum().values.flatten()\n", + "\n", + "test2_bin_mids = np.arange(len(test2_weights))\n", + "test2_bin_edges = np.arange(len(test2_weights + 1)) -0.5\n", + "\n", + "fig, ax = plt.subplots(\n", + " nrows=1,\n", + " ncols=1,\n", + " figsize=(9.0, 4.0),\n", + " dpi=200,\n", + ")\n", + "\n", + "\n", + "_ = ax.hist(\n", + " x=test2_bin_mids,\n", + " bins=test2_bin_edges,\n", + " weights=test2_weights,\n", + " stacked=False,\n", + " color=\"lightgray\",\n", + " alpha=0.5,\n", + " lw=0.8,\n", + " label=\"Nominal\",\n", + " histtype=\"stepfilled\",\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fe69d17b", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "db0c86a5", + "metadata": {}, + "outputs": [], + "source": [ + "test3_sys_col_manager = SysColVarManager(base_df=test_df[bin_manager.binning_df.COMP_BIN == 1], fit_setup=fit_status.fitter_instance.fit_setup_info)\n", + "test3_bin_manager = test3_sys_col_manager.get_bin_and_comp_mask_manager()\n", + "\n", + "test3_groupby_res = test3_bin_manager.get_groupby_for(df=test_df[bin_manager.binning_df.COMP_BIN == 1], cols=[fit_status.fitter_instance.fit_setup_info.weight_info.col_name])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "be7283b9", + "metadata": {}, + "outputs": [], + "source": [ + "test3_weights = test3_groupby_res.sum().values.flatten()\n", + "\n", + "test3_bin_mids = np.arange(len(test3_weights))\n", + "test3_bin_edges = np.arange(len(test3_weights + 1)) -0.5\n", + "\n", + "fig, ax = plt.subplots(\n", + " nrows=1,\n", + " ncols=1,\n", + " figsize=(9.0, 4.0),\n", + " dpi=200,\n", + ")\n", + "\n", + "_ = ax.hist(\n", + " x=test3_bin_mids,\n", + " bins=test3_bin_edges,\n", + " weights=test3_weights,\n", + " stacked=False,\n", + " color=\"lightgray\",\n", + " alpha=0.5,\n", + " lw=0.8,\n", + " label=\"Nominal\",\n", + " histtype=\"stepfilled\",\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3fe804d0", + "metadata": {}, + "outputs": [], + "source": [ + "test3_sys_col_manager = SysColVarManager(base_df=test_df[bin_manager.binning_df.COMP_BIN == 2], fit_setup=fit_status.fitter_instance.fit_setup_info)\n", + "test3_bin_manager = test3_sys_col_manager.get_bin_and_comp_mask_manager()\n", + "\n", + "test3_groupby_res = test3_bin_manager.get_groupby_for(df=test_df[bin_manager.binning_df.COMP_BIN == 2], cols=[fit_status.fitter_instance.fit_setup_info.weight_info.col_name])\n", + "\n", + "test3_weights = test3_groupby_res.sum().values.flatten()\n", + "\n", + "test3_bin_mids = np.arange(len(test3_weights))\n", + "test3_bin_edges = np.arange(len(test3_weights + 1)) -0.5\n", + "\n", + "fig, ax = plt.subplots(\n", + " nrows=1,\n", + " ncols=1,\n", + " figsize=(9.0, 4.0),\n", + " dpi=200,\n", + ")\n", + "\n", + "_ = ax.hist(\n", + " x=test3_bin_mids,\n", + " bins=test3_bin_edges,\n", + " weights=test3_weights,\n", + " stacked=False,\n", + " color=\"lightgray\",\n", + " alpha=0.5,\n", + " lw=0.8,\n", + " label=\"Nominal\",\n", + " histtype=\"stepfilled\",\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4e0d8b91", + "metadata": {}, + "outputs": [], + "source": [ + "_test3_comp_id=3\n", + "\n", + "test3_sys_col_manager = SysColVarManager(base_df=test_df[bin_manager.binning_df.COMP_BIN == _test3_comp_id], fit_setup=fit_status.fitter_instance.fit_setup_info)\n", + "test3_bin_manager = test3_sys_col_manager.get_bin_and_comp_mask_manager()\n", + "\n", + "test3_groupby_res = test3_bin_manager.get_groupby_for(df=test_df[bin_manager.binning_df.COMP_BIN == _test3_comp_id], cols=[fit_status.fitter_instance.fit_setup_info.weight_info.col_name])\n", + "\n", + "test3_weights = test3_groupby_res.sum().values.flatten()\n", + "\n", + "test3_bin_mids = np.arange(len(test3_weights))\n", + "test3_bin_edges = np.arange(len(test3_weights + 1)) -0.5\n", + "\n", + "fig, ax = plt.subplots(\n", + " nrows=1,\n", + " ncols=1,\n", + " figsize=(9.0, 4.0),\n", + " dpi=200,\n", + ")\n", + "\n", + "_ = ax.hist(\n", + " x=test3_bin_mids,\n", + " bins=test3_bin_edges,\n", + " weights=test3_weights,\n", + " stacked=False,\n", + " color=\"lightgray\",\n", + " alpha=0.5,\n", + " lw=0.8,\n", + " label=\"Nominal\",\n", + " histtype=\"stepfilled\",\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2154627a", + "metadata": {}, + "outputs": [], + "source": [ + "_test3_comp_id=4\n", + "\n", + "test3_sys_col_manager = SysColVarManager(base_df=test_df[bin_manager.binning_df.COMP_BIN == _test3_comp_id], fit_setup=fit_status.fitter_instance.fit_setup_info)\n", + "test3_bin_manager = test3_sys_col_manager.get_bin_and_comp_mask_manager()\n", + "\n", + "test3_groupby_res = test3_bin_manager.get_groupby_for(df=test_df[bin_manager.binning_df.COMP_BIN == _test3_comp_id], cols=[fit_status.fitter_instance.fit_setup_info.weight_info.col_name])\n", + "\n", + "test3_weights = test3_groupby_res.sum().values.flatten()\n", + "\n", + "test3_bin_mids = np.arange(len(test3_weights))\n", + "test3_bin_edges = np.arange(len(test3_weights + 1)) -0.5\n", + "\n", + "fig, ax = plt.subplots(\n", + " nrows=1,\n", + " ncols=1,\n", + " figsize=(9.0, 4.0),\n", + " dpi=200,\n", + ")\n", + "\n", + "_ = ax.hist(\n", + " x=test3_bin_mids,\n", + " bins=test3_bin_edges,\n", + " weights=test3_weights,\n", + " stacked=False,\n", + " color=\"lightgray\",\n", + " alpha=0.5,\n", + " lw=0.8,\n", + " label=\"Nominal\",\n", + " histtype=\"stepfilled\",\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "04398844", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2afd33c6", + "metadata": {}, + "outputs": [], + "source": [ + "[obs_info.col_name for obs_info in fit_status.fitter_instance.fit_setup_info.observable_infos]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "699c58a0", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c739311b", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e1dca977", + "metadata": {}, + "outputs": [], + "source": [ + "binning_info: FitSetupBinningPerRecoChInfo = FitSetupBinningPerRecoChInfo.init_from(fit_setup=fit_status.fitter_instance.fit_setup_info)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5b79e218", + "metadata": {}, + "outputs": [], + "source": [ + "binning_info" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9816c711", + "metadata": {}, + "outputs": [], + "source": [ + "ff_sig_shape_sys_info: UpDownAndDiffSystematicsDetails = rdstar_sys.AddSysFFSignal.get_shape_sys_info()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1fed9468", + "metadata": {}, + "outputs": [], + "source": [ + "ff_sig_shape_sys_info.shape_effects.shape" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4cfcdb97", + "metadata": {}, + "outputs": [], + "source": [ + "ff_sig_shape_sys_info.shape_effects[:, 0].shape" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0d6e8d59", + "metadata": {}, + "outputs": [], + "source": [ + "var_index = 1\n", + "\n", + "sys_from_file_var_data = ff_sig_shape_sys_info.shape_effects[:1000, var_index]\n", + "\n", + "fig, ax = plt.subplots(\n", + " nrows=1,\n", + " ncols=1,\n", + " figsize=(9.0, 4.0),\n", + " dpi=200,\n", + ")\n", + "\n", + "sys_from_file_bin_mids = np.arange(len(sys_from_file_var_data))\n", + "sys_from_file_bin_edges = np.arange(len(sys_from_file_var_data) + 1) - 0.5\n", + "\n", + "component_boarders = np.arange(np.round(len(sys_from_file_var_data) / 146) + 1) * 146\n", + "\n", + "ax.vlines(component_boarders, np.min(sys_from_file_var_data), max(sys_from_file_var_data), colors=\"black\", lw=0.6,linestyle=\"dashed\")\n", + "\n", + "_ = ax.hist(\n", + " x=sys_from_file_bin_mids,\n", + " bins=sys_from_file_bin_edges,\n", + " weights=sys_from_file_var_data,\n", + " stacked=False,\n", + " color=\"lightgray\",\n", + " alpha=0.5,\n", + " lw=0.8,\n", + " label=f\"Sys from file var {var_index}\",\n", + " histtype=\"stepfilled\",\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8b3c58b7", + "metadata": {}, + "outputs": [], + "source": [ + "var_index = 1\n", + "\n", + "sys_from_file_var_data = ff_sig_shape_sys_info.relative_shape_effects[:1000, var_index]\n", + "\n", + "fig, ax = plt.subplots(\n", + " nrows=1,\n", + " ncols=1,\n", + " figsize=(9.0, 4.0),\n", + " dpi=200,\n", + ")\n", + "\n", + "sys_from_file_bin_mids = np.arange(len(sys_from_file_var_data))\n", + "sys_from_file_bin_edges = np.arange(len(sys_from_file_var_data) + 1) - 0.5\n", + "\n", + "component_boarders = np.arange(np.round(len(sys_from_file_var_data) / 146) + 1) * 146\n", + "\n", + "ax.vlines(component_boarders, np.min(sys_from_file_var_data), max(sys_from_file_var_data), colors=\"black\", lw=0.6,linestyle=\"dashed\")\n", + "\n", + "_ = ax.hist(\n", + " x=sys_from_file_bin_mids,\n", + " bins=sys_from_file_bin_edges,\n", + " weights=sys_from_file_var_data,\n", + " stacked=False,\n", + " color=\"lightgray\",\n", + " alpha=0.5,\n", + " lw=0.8,\n", + " label=f\"Sys from file var {var_index}\",\n", + " histtype=\"stepfilled\",\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "aa06ad3f", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1a5e3eee", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d2908e97", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "05e8703e", + "metadata": {}, + "outputs": [], + "source": [ + "ff_sig_shape_sys_info.relative_shape_effects.shape" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2e1dc8e7", + "metadata": {}, + "outputs": [], + "source": [ + "shape_sys_infos_from_file = np.stack(np.split(ff_sig_shape_sys_info.relative_shape_effects, binning_info.n_bins_per_component, axis=0))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "31ef1567", + "metadata": {}, + "outputs": [], + "source": [ + "shape_sys_infos_from_file.shape" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "434fb6c7", + "metadata": {}, + "outputs": [], + "source": [ + "shape_sys_infos_from_file[:, 0, 0].shape" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "72e4fcec", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c05f2c41", + "metadata": {}, + "outputs": [], + "source": [ + "n_components = fit_status.fitter_instance.fit_setup_info.n_components\n", + "\n", + "reco_ch_bins_per_reco_ch = fit_status.fitter_instance.fit_setup_info.n_bins_per_reco_ch\n", + "reco_ch_bin_boarders = np.cumsum(reco_ch_bins_per_reco_ch) - 0.5\n", + "reco_ch_bin_boarders, reco_ch_bins_per_reco_ch" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "dd65a8fb", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ddb2139e", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9f2e0471", + "metadata": {}, + "outputs": [], + "source": [ + "def plot_sys_var_distribution_for(\n", + " sys_shape_info: ShapePlotInfoContainer,\n", + " component_id: int,\n", + " var_id: int,\n", + "):\n", + " this_nominal_array = sys_shape_info.normed_base_shape[:, component_id]\n", + " this_uncert_array = sys_shape_info.relative_shape_error[:, component_id, var_id]\n", + "\n", + " assert this_nominal_array.shape == this_uncert_array.shape, (this_nominal_array.shape, this_uncert_array.shape)\n", + "\n", + " nominal_weight_vals = this_nominal_array\n", + " uncert_weight_vals = this_uncert_array\n", + "\n", + " n_bins = this_nominal_array.shape[0]\n", + "\n", + " bin_edges = np.arange(n_bins + 1) - 0.5\n", + " bin_mids = np.arange(n_bins)\n", + "\n", + " fig, ax = plt.subplots(\n", + " nrows=1,\n", + " ncols=1,\n", + " figsize=(9.0, 4.0),\n", + " dpi=200,\n", + " )\n", + "\n", + " _additional_nominal_scale_factor = 2.1\n", + " \n", + " scale_factor = np.max(np.abs(uncert_weight_vals)) / np.max(nominal_weight_vals) * _additional_nominal_scale_factor\n", + "\n", + " ax.vlines(reco_ch_bin_boarders, np.min(uncert_weight_vals), max(np.max(uncert_weight_vals) * _additional_nominal_scale_factor, np.max(nominal_weight_vals * scale_factor)), colors=\"black\", lw=0.6,linestyle=\"dashed\")\n", + "\n", + " _ = ax.hist(\n", + " x=bin_mids,\n", + " bins=bin_edges,\n", + " weights=nominal_weight_vals * scale_factor,\n", + " stacked=False,\n", + " color=\"lightgray\",\n", + " alpha=0.5,\n", + " lw=0.8,\n", + " label=r\"Nominal $/$\" + f\" {1.0 / scale_factor:.2f}\",\n", + " histtype=\"stepfilled\",\n", + " )\n", + "\n", + " _ = ax.hist(\n", + " x=bin_mids,\n", + " bins=bin_edges,\n", + " weights=uncert_weight_vals,\n", + " stacked=False,\n", + " color=\"orange\",\n", + " alpha=1.0,\n", + " lw=0.8,\n", + " label=\"Sys Var\",\n", + " histtype=\"step\",\n", + " )\n", + "\n", + " ax.set_title(sys_shape_info.latex_str + f\" (Var {var_id}) in Component {component_id + 1} of {n_components}: \" + fit_status.fitter_instance.fit_setup_info.components[component_id].latex_label)\n", + "\n", + " ax.legend(frameon=False, loc=1, ncol=1, fontsize=12)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8f05b0b0", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "41d8892b", + "metadata": {}, + "outputs": [], + "source": [ + "sig_ff_sys_shape_info_from_file = ShapePlotInfoContainer(\n", + " name=RDStarFitter.full_systematics_container.add_sys__ff_signal.systematics_key,\n", + " latex_str=RDStarFitter.full_systematics_container.add_sys__ff_signal.latex_str,\n", + " normed_base_shape=fit_status.fitter_instance.template_shapes,\n", + " relative_shape_error=shape_sys_infos_from_file,\n", + " pure_bin_counts=fit_status.fitter_instance.squared_template_stat_error,\n", + " components_to_consider=(\"BpDztau\", \"BzDmtau\", \"BpDzStau\", \"BzDmStau\"),\n", + " scale_factor=None,\n", + " plot_subsets=False,\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "419fbaa3", + "metadata": {}, + "outputs": [], + "source": [ + "plot_sys_var_distribution_for(\n", + " sys_shape_info=sig_ff_sys_shape_info_from_file,\n", + " component_id=0,\n", + " var_id=0,\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7fd5e1a9", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "96ba97d5", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "eecd5082", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9939bfb5", + "metadata": {}, + "outputs": [], + "source": [ + "sig_ff_sys_shape_info = ShapePlotInfoContainer(\n", + " name=RDStarFitter.full_systematics_container.add_sys__ff_signal.systematics_key,\n", + " latex_str=RDStarFitter.full_systematics_container.add_sys__ff_signal.latex_str,\n", + " normed_base_shape=fit_status.fitter_instance.template_shapes,\n", + " relative_shape_error=fit_status.fitter_instance.ff_sig_rel_shape_sys_uncert_matrix,\n", + " pure_bin_counts=fit_status.fitter_instance.squared_template_stat_error,\n", + " components_to_consider=(\"BpDztau\", \"BzDmtau\", \"BpDzStau\", \"BzDmStau\"),\n", + " scale_factor=None,\n", + " plot_subsets=False,\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "18a6d7a3", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b68719a4", + "metadata": {}, + "outputs": [], + "source": [ + "def get_shapes_of_distribution_for(\n", + " sys_shape_info: ShapePlotInfoContainer,\n", + " component_id: int,\n", + " var_id: int,\n", + "):\n", + " this_nominal_array = sys_shape_info.normed_base_shape[:, component_id]\n", + " this_uncert_array = sys_shape_info.relative_shape_error[:, component_id, var_id]\n", + " this_nominal_array_shape = this_nominal_array.shape\n", + " this_uncert_array_shape = this_uncert_array.shape\n", + " \n", + " return (this_nominal_array_shape, this_nominal_array,\"\\n\", this_uncert_array_shape, this_uncert_array)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e97f8389", + "metadata": {}, + "outputs": [], + "source": [ + "get_shapes_of_distribution_for(\n", + " sys_shape_info=sig_ff_sys_shape_info,\n", + " component_id=0,\n", + " var_id=0,\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9953a1b2", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "599dd806", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "becd514e", + "metadata": {}, + "outputs": [], + "source": [ + "sig_ff_sys_shape_info.normed_base_shape.shape" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3a55afec", + "metadata": {}, + "outputs": [], + "source": [ + "sig_ff_sys_shape_info.relative_shape_error.shape" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9e6fe6cd", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a2337592", + "metadata": {}, + "outputs": [], + "source": [ + "plot_sys_var_distribution_for(\n", + " sys_shape_info=sig_ff_sys_shape_info,\n", + " component_id=0,\n", + " var_id=0,\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ddfed25f", + "metadata": {}, + "outputs": [], + "source": [ + "plot_sys_var_distribution_for(\n", + " sys_shape_info=sig_ff_sys_shape_info,\n", + " component_id=0,\n", + " var_id=1,\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5a0c7b27", + "metadata": {}, + "outputs": [], + "source": [ + "plot_sys_var_distribution_for(\n", + " sys_shape_info=sig_ff_sys_shape_info,\n", + " component_id=0,\n", + " var_id=2,\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "97d04076", + "metadata": {}, + "outputs": [], + "source": [ + "plot_sys_var_distribution_for(\n", + " sys_shape_info=sig_ff_sys_shape_info,\n", + " component_id=0,\n", + " var_id=3,\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "324c5957", + "metadata": {}, + "outputs": [], + "source": [ + "plot_sys_var_distribution_for(\n", + " sys_shape_info=sig_ff_sys_shape_info,\n", + " component_id=0,\n", + " var_id=4,\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c64c7e04", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "dd33ef54", + "metadata": { + "scrolled": false + }, + "outputs": [], + "source": [ + "for comp_id in range(14):\n", + " plot_sys_var_distribution_for(\n", + " sys_shape_info=sig_ff_sys_shape_info,\n", + " component_id=comp_id,\n", + " var_id=0,\n", + " )" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "71d9cdcd", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b1fad2c5", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2efc120d", + "metadata": {}, + "outputs": [], + "source": [ + "lid_sys_shape_info = ShapePlotInfoContainer(\n", + " name=RDStarFitter.full_systematics_container.add_sys__pid_lepton.systematics_key,\n", + " latex_str=RDStarFitter.full_systematics_container.add_sys__pid_lepton.latex_str,\n", + " normed_base_shape=fit_status.fitter_instance.template_shapes,\n", + " relative_shape_error=fit_status.fitter_instance.lepton_id_rel_shape_sys_uncert_matrix,\n", + " pure_bin_counts=fit_status.fitter_instance.squared_template_stat_error,\n", + " components_to_consider=None,\n", + " scale_factor=None,\n", + " plot_subsets=False,\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1923f299", + "metadata": {}, + "outputs": [], + "source": [ + "plot_sys_var_distribution_for(\n", + " sys_shape_info=lid_sys_shape_info,\n", + " component_id=0,\n", + " var_id=0,\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9a13f645", + "metadata": {}, + "outputs": [], + "source": [ + "plot_sys_var_distribution_for(\n", + " sys_shape_info=lid_sys_shape_info,\n", + " component_id=0,\n", + " var_id=1,\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3e33b886", + "metadata": {}, + "outputs": [], + "source": [ + "plot_sys_var_distribution_for(\n", + " sys_shape_info=lid_sys_shape_info,\n", + " component_id=0,\n", + " var_id=2,\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9627abd2", + "metadata": {}, + "outputs": [], + "source": [ + "plot_sys_var_distribution_for(\n", + " sys_shape_info=lid_sys_shape_info,\n", + " component_id=0,\n", + " var_id=3,\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d647d1e2", + "metadata": {}, + "outputs": [], + "source": [ + "plot_sys_var_distribution_for(\n", + " sys_shape_info=lid_sys_shape_info,\n", + " component_id=0,\n", + " var_id=4,\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9331f33d", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f4701dbf", + "metadata": {}, + "outputs": [], "source": [] }, { @@ -565,14 +1851,11 @@ "execution_count": null, "id": "3fab180d", "metadata": { - "scrolled": false + "scrolled": true }, "outputs": [], "source": [ - "_this_lid_shape_eval_info = None\n", - "for _a_lid_shape_eval_info in lid_shape_eval_generator:\n", - " _this_lid_shape_eval_info = _a_lid_shape_eval_info\n", - " break" + "_this_lid_shape_eval_info = list(lid_shape_eval_generator)" ] }, { @@ -622,51 +1905,16 @@ "metadata": {}, "outputs": [], "source": [ - "_this_norm_ff_shape_eval_info = None\n", - "for _a_norm_ff_shape_eval_info in norm_ff_shape_eval_generator:\n", - " _this_norm_ff_shape_eval_info = _a_norm_ff_shape_eval_info\n", - " break" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "3f9e8a95", - "metadata": {}, - "outputs": [], - "source": [ - "_this_norm_ff_shape_eval_info.shape_error" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "60607294", - "metadata": {}, - "outputs": [], - "source": [ - "_this_norm_ff_shape_eval_info.normed_base_shape" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "c9b8ef4c", - "metadata": {}, - "outputs": [], - "source": [ - "_this_norm_ff_shape_eval_info.stat_error" + "_this_norm_ff_shape_eval_info = list(norm_ff_shape_eval_generator)" ] }, { "cell_type": "code", "execution_count": null, - "id": "e6db126c", + "id": "9851eff1", "metadata": {}, "outputs": [], - "source": [ - "_this_norm_ff_shape_eval_info.stat_error" - ] + "source": [] }, { "cell_type": "code", @@ -707,40 +1955,7 @@ "metadata": {}, "outputs": [], "source": [ - "_this_sig_ff_shape_eval_info = None\n", - "for _a_sig_ff_shape_eval_info in sig_ff_shape_eval_generator:\n", - " _this_sig_ff_shape_eval_info = _a_sig_ff_shape_eval_info\n", - " break" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "183de20f", - "metadata": {}, - "outputs": [], - "source": [ - "_this_sig_ff_shape_eval_info" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "29ae25e7", - "metadata": {}, - "outputs": [], - "source": [ - "_this_sig_ff_shape_eval_info.normed_base_shape" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "d9cc8f0f", - "metadata": {}, - "outputs": [], - "source": [ - "_this_sig_ff_shape_eval_info.shape_error" + "_this_sig_ff_shape_eval_info = list(sig_ff_shape_eval_generator)\n" ] }, { @@ -779,13 +1994,12 @@ "cell_type": "code", "execution_count": null, "id": "854b1246", - "metadata": {}, + "metadata": { + "scrolled": true + }, "outputs": [], "source": [ - "_this_tracking_shape_eval_info = None\n", - "for _a_tracking_shape_eval_info in tracking_shape_eval_generator:\n", - " _this_tracking_shape_eval_info = _a_tracking_shape_eval_info\n", - " break" + "_this_tracking_shape_eval_infos = list(tracking_shape_eval_generator)" ] }, { @@ -859,10 +2073,7 @@ "metadata": {}, "outputs": [], "source": [ - "_this_k_short_shape_eval_info = None\n", - "for _a_k_short_shape_eval_info in k_short_shape_eval_generator:\n", - " _this_k_short_shape_eval_info = _a_k_short_shape_eval_info\n", - " break" + "_this_k_short_shape_eval_info = list(k_short_shape_eval_generator)" ] }, { diff --git a/rdstar/offline_analysis/fitting/dedicated_fit_approach/notebooks/systematics/sys_covariance_evaluation.ipynb b/rdstar/offline_analysis/fitting/dedicated_fit_approach/notebooks/systematics/sys_covariance_evaluation.ipynb index b9c05fc0c..b99e9a282 100644 --- a/rdstar/offline_analysis/fitting/dedicated_fit_approach/notebooks/systematics/sys_covariance_evaluation.ipynb +++ b/rdstar/offline_analysis/fitting/dedicated_fit_approach/notebooks/systematics/sys_covariance_evaluation.ipynb @@ -588,7 +588,7 @@ "metadata": {}, "outputs": [], "source": [ - "np.min(np.abstracking_shape_sys_eval.relative_shape_effects), np.max(np.abs(tracking_shape_sys_eval.relative_shape_effects))" + "np.min(np.abs(tracking_shape_sys_eval.relative_shape_effects)), np.max(np.abs(tracking_shape_sys_eval.relative_shape_effects))" ] }, { diff --git a/rdstar/offline_analysis/fitting/dedicated_fit_approach/notebooks/systematics/sys_shape_effect_studies.ipynb b/rdstar/offline_analysis/fitting/dedicated_fit_approach/notebooks/systematics/sys_shape_effect_studies.ipynb new file mode 100644 index 000000000..fa0e4c1ae --- /dev/null +++ b/rdstar/offline_analysis/fitting/dedicated_fit_approach/notebooks/systematics/sys_shape_effect_studies.ipynb @@ -0,0 +1,1299 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "38fccf33", + "metadata": {}, + "outputs": [], + "source": [ + "%load_ext autoreload\n", + "%autoreload 2" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d90ef4ad", + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "import copy\n", + "import scipy\n", + "\n", + "import numpy as np\n", + "import pandas as pd\n", + "import seaborn as sns\n", + "\n", + "from dataclasses import dataclass\n", + "from collections.abc import Mapping\n", + "from typing import Tuple, List, Dict, Sequence, Optional, Callable, Union, Iterable, ClassVar, Generator, Iterator\n", + "\n", + "import matplotlib.pyplot as plt\n", + "from IPython.core.display import display, HTML\n", + "\n", + "pd.set_option('display.max_columns', 999)\n", + "\n", + "display(HTML(\"<style>.container { width:90% !important; }</style>\"))\n", + "%matplotlib inline" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "49635bf2", + "metadata": {}, + "outputs": [], + "source": [ + "plt.switch_backend(\"module://ipykernel.pylab.backend_inline\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "86905141", + "metadata": {}, + "outputs": [], + "source": [ + "from templatefitter.plotter.plot_utilities import export, AxesType, FigureType\n", + "from templatefitter.plotter.plot_style import KITColors, TangoColors, set_matplotlibrc_params, xlabel_pos, ylabel_pos\n", + "\n", + "from rdstar.utilities import PathType\n", + "from rdstar.utilities.logging import log_to_default\n", + "\n", + "from rdstar.offline_analysis.observables.common_observables import Cols\n", + "from rdstar.offline_analysis.rdstar_samples import alternative_sample_keys, main_sample_keys\n", + "\n", + "from rdstar.offline_analysis.reco_modes.reco_mode_collections import BSigRecoModes\n", + "from rdstar.offline_analysis.observables.sig_id_observables import SigIDObservables\n", + "from rdstar.offline_analysis.mc_matching.mc_matching_id_collections import MCMatchingCollections\n", + "\n", + "from rdstar.offline_analysis.fitting.dedicated_fit_approach.fit_setups import FitSetupCollection\n", + "from rdstar.offline_analysis.fitting.dedicated_fit_approach.dedicated_fit_routine import RDStarFitDataManager\n", + "\n", + "from rdstar.offline_analysis.systematics import SysColsInfo, full_systematics_dict, multiplicative_eff_sys_info_names\n", + "\n", + "from rdstar.offline_analysis.fitting.dedicated_fit_approach.systematics_manager.shape_sys_evaluater import (\n", + " sys_info_cache_dir_path,\n", + " SysCovEvaluationManager,\n", + " SysDiffEvaluationManager,\n", + " plot_heatmap,\n", + ")\n", + "\n", + "from rdstar.offline_analysis.fitting.dedicated_fit_approach.fit_info_container import (\n", + " ComponentInfo,\n", + " FitSetupInfoContainer,\n", + " FitObservableInfo,\n", + " RecoChannelInfo,\n", + ")\n", + "\n", + "from rdstar.offline_analysis.fitting.dedicated_fit_approach.dedicated_sys_handler import SystematicsInfo\n", + "from rdstar.offline_analysis.fitting.dedicated_fit_approach.fit_result_container import RDStarFitResultContainer\n", + "from rdstar.offline_analysis.fitting.dedicated_fit_approach.fit_setups.fit_setup_utilities import FitSetupTriplet\n", + "from rdstar.offline_analysis.fitting.dedicated_fit_approach.systematics_manager.shape_sys_evaluater import FitSetupBinningPerRecoChInfo" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "635fd0d7", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a352c7da", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cc25057b", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "id": "367fc191", + "metadata": {}, + "source": [ + "# General Settings" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5ebce82d", + "metadata": {}, + "outputs": [], + "source": [ + "rdstar_fit_setup_triplet = FitSetupCollection.rdstar_fit_setup" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c97d041b", + "metadata": {}, + "outputs": [], + "source": [ + "this_fit_setup = rdstar_fit_setup_triplet.asimov" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1eac9dec", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0f354170", + "metadata": {}, + "outputs": [], + "source": [ + "selection_name = \"main_with_mva_selection_sec_sel_mva\"\n", + "\n", + "base_input_path = os.path.join(\n", + " \"/ceph/fmetzner/rdstar/data_set_prod_24thMarch2022_stream0/CombinedSamples/\",\n", + " selection_name,\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "76f782d1", + "metadata": {}, + "outputs": [], + "source": [ + "fit_mc_cache_dir_path = \"/ceph/fmetzner/rdstar/data_set_prod_24thMarch2022_stream0/FitMCDataCache\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c5e6161b", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "92e1cf3e", + "metadata": {}, + "outputs": [], + "source": [ + "weight_col = this_fit_setup.weight_info.col_name" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6b294b74", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7996c53c", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cdb66d4a", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "id": "08c619b3", + "metadata": {}, + "source": [ + "# Loading Data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a44b42c5", + "metadata": {}, + "outputs": [], + "source": [ + "rdstar_data_manager = RDStarFitDataManager(\n", + " fit_setup=rdstar_fit_setup_triplet,\n", + " base_input_path=base_input_path,\n", + " mc_cache_version_tag=\"V3_March2024\",\n", + " mc_cache_dir_path=fit_mc_cache_dir_path,\n", + " use_alternative_gap_setup=True,\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e2f50216", + "metadata": {}, + "outputs": [], + "source": [ + "full_df = rdstar_data_manager.mc_sample" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c0498e31", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "bba0d73b", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "id": "68f87129", + "metadata": {}, + "source": [ + "# Plotting Functions" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d96c6294", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "20c237a9", + "metadata": {}, + "outputs": [], + "source": [ + "def get_ordered_fit_observable_infos(\n", + " reco_ch_info: RecoChannelInfo,\n", + " invert_obs_order: bool,\n", + ") -> Tuple[FitObservableInfo, ...]:\n", + " if invert_obs_order:\n", + " return tuple(list(reco_ch_info.observable_infos)[::-1])\n", + " else:\n", + " return reco_ch_info.observable_infos" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d430f4db", + "metadata": {}, + "outputs": [], + "source": [ + "def get_fit_obs_binning_col_name(fit_obs_id: int, inverted_order: bool) -> str:\n", + " if inverted_order:\n", + " return f\"inverted_binning_in_fit_obs_{fit_obs_id}\"\n", + " else:\n", + " return f\"binning_in_fit_obs_{fit_obs_id}\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2f71acc1", + "metadata": {}, + "outputs": [], + "source": [ + "def add_2d_binning_column(\n", + " column_name: str,\n", + " df: pd.DataFrame,\n", + " fit_setup: FitSetupInfoContainer,\n", + " invert_obs_order: bool,\n", + ") -> None:\n", + " assert column_name not in df.columns, column_name\n", + " \n", + " df.loc[:, column_name] = np.nan\n", + " \n", + " for fo_id in range(len(fit_setup.observable_infos)):\n", + " df.loc[:, get_fit_obs_binning_col_name(fit_obs_id=fo_id, inverted_order=invert_obs_order)] = np.nan\n", + " \n", + " n_reco_ch: int = len(fit_setup.reco_channels)\n", + " \n", + " for reco_ch_id, reco_ch_info in enumerate(fit_setup.reco_channels):\n", + " assert isinstance(reco_ch_info, RecoChannelInfo), type(reco_ch_info)\n", + " \n", + " reco_ch_mask = reco_ch_info.get_mask(df=df, reco_mode_column_str=fit_setup.reco_mode_col)\n", + " \n", + " fit_obs_infos = get_ordered_fit_observable_infos(\n", + " reco_ch_info=reco_ch_info,\n", + " invert_obs_order=invert_obs_order,\n", + " )\n", + " \n", + " assert len(fit_obs_infos) == 2, (len(fit_obs_infos) ,fit_obs_infos)\n", + " \n", + " n_bins_per_obs_in_reco_ch: List[int] = [fo.n_bins for fo in fit_obs_infos]\n", + " \n", + " for fit_obs_id, fit_obs_info in enumerate(fit_obs_infos):\n", + " digitize_result = np.digitize(\n", + " df[reco_ch_mask][fit_obs_info.col_name].values,\n", + " fit_obs_info.bin_edges,\n", + " )\n", + " \n", + " df.loc[reco_ch_mask, get_fit_obs_binning_col_name(fit_obs_id=fit_obs_id, inverted_order=invert_obs_order)] = digitize_result - 1\n", + " \n", + " obs_0_ids = df[reco_ch_mask][get_fit_obs_binning_col_name(fit_obs_id=0, inverted_order=invert_obs_order)].values\n", + " obs_1_ids = df[reco_ch_mask][get_fit_obs_binning_col_name(fit_obs_id=1, inverted_order=invert_obs_order)].values\n", + " \n", + " df.loc[reco_ch_mask, column_name] = obs_0_ids * n_bins_per_obs_in_reco_ch[1] + obs_1_ids\n", + " " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0b60e0cc", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f0f9e2e9", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d33a72e4", + "metadata": {}, + "outputs": [], + "source": [ + "# del full_df[\"__hist_obs_binning_id__\"]\n", + "add_2d_binning_column(\n", + " column_name=\"__hist_obs_binning_id__\",\n", + " df=full_df,\n", + " fit_setup=this_fit_setup,\n", + " invert_obs_order=False,\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a9e2469f", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a4bd06c8", + "metadata": {}, + "outputs": [], + "source": [ + "# del full_df[\"__inverted_hist_obs_binning_id__\"]\n", + "add_2d_binning_column(\n", + " column_name=\"__inverted_hist_obs_binning_id__\",\n", + " df=full_df,\n", + " fit_setup=this_fit_setup,\n", + " invert_obs_order=True,\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e9428a1e", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6ca5eab4", + "metadata": {}, + "outputs": [], + "source": [ + "full_df[[\"__hist_obs_binning_id__\", \"__inverted_hist_obs_binning_id__\", this_fit_setup.reco_mode_col]].describe()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2a2b187a", + "metadata": {}, + "outputs": [], + "source": [ + "full_df[[\"__hist_obs_binning_id__\", \"__inverted_hist_obs_binning_id__\", this_fit_setup.reco_mode_col]]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "51227e7e", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5f243cb2", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3592e161", + "metadata": {}, + "outputs": [], + "source": [ + "def plot_sys_effect(\n", + " df: pd.DataFrame,\n", + " column_name: str,\n", + " fit_setup: FitSetupInfoContainer,\n", + ") -> None:\n", + " \n", + " assert column_name in df.columns, column_name\n", + " \n", + " set_matplotlibrc_params()\n", + " \n", + " main_weight_col: str = weight_col\n", + " \n", + "# base_correction_col: str = \"HAMMER_weight_Norm\"\n", + "# up_var_col: str = \"HAMMER_weight_Norm_up0\"\n", + "# dw_var_col: str = \"HAMMER_weight_Norm_down0\"\n", + " \n", + " base_correction_col: str = \"HAMMER_weight_Signal\"\n", + " up_var_col: str = \"HAMMER_weight_Signal_up0\"\n", + " dw_var_col: str = \"HAMMER_weight_Signal_down0\"\n", + " \n", + " new_up_weight_col: str = \"__tmp_full_weight_with_HFF_var0_up_var__\"\n", + " new_dw_weight_col: str = \"__tmp_full_weight_with_HFF_var0_dw_var__\"\n", + " \n", + " df.loc[:, new_up_weight_col] = df[main_weight_col].values / df[base_correction_col].values * df[up_var_col].values\n", + " df.loc[:, new_dw_weight_col] = df[main_weight_col].values / df[base_correction_col].values * df[dw_var_col].values\n", + " \n", + " for reco_ch_id, reco_ch_info in enumerate(fit_setup.reco_channels):\n", + " \n", + " reco_ch_mask = reco_ch_info.get_mask(df=df, reco_mode_column_str=fit_setup.reco_mode_col)\n", + " \n", + " n_bins: int = reco_ch_info.n_bins_total\n", + " hist_range: Tuple[float, float] = (-0.5, n_bins - 0.5)\n", + " \n", + " for component_info in fit_setup.components:\n", + " \n", + " component_mask = component_info.get_component_mask(\n", + " df=df,\n", + " component_id_column_name=fit_setup.component_id_column_name,\n", + " )\n", + " \n", + " this_mask = reco_ch_mask & component_mask\n", + " \n", + " fig, (ax1, ax2) = plt.subplots(\n", + " nrows=2,\n", + " ncols=1,\n", + " figsize=(9.0, 6.0),\n", + " dpi=300,\n", + " sharex=\"all\",\n", + " gridspec_kw={\"height_ratios\": [3.0, 2.0]},\n", + " )\n", + "\n", + " bin_counts_nom, list_of_bin_edges_nom = np.histogramdd(\n", + " df[this_mask][column_name].values,\n", + " bins=n_bins,\n", + " range=(hist_range,),\n", + " density=False,\n", + " weights=df[this_mask][main_weight_col].values,\n", + " )\n", + "\n", + " bin_counts_up, list_of_bin_edges_up = np.histogramdd(\n", + " df[this_mask][column_name].values,\n", + " bins=n_bins,\n", + " range=(hist_range,),\n", + " density=False,\n", + " weights=df[this_mask][new_up_weight_col].values,\n", + " )\n", + "\n", + " bin_counts_dw, list_of_bin_edges_dw = np.histogramdd(\n", + " df[this_mask][column_name].values,\n", + " bins=n_bins,\n", + " range=(hist_range,),\n", + " density=False,\n", + " weights=df[this_mask][new_dw_weight_col].values,\n", + " )\n", + "\n", + " bin_edges_nom = list_of_bin_edges_nom[0]\n", + " bin_mids: np.ndarray = (bin_edges_nom[1:] + bin_edges_nom[:-1]) / 2.0\n", + "\n", + " nominal_data = bin_counts_nom\n", + " up_data = bin_counts_up\n", + " down_data = bin_counts_dw\n", + "\n", + " hist_plot_infos: List[Tuple[str, np.ndarray, str]] = [\n", + " (KITColors.kit_black, nominal_data, \"Nominal\"),\n", + " (KITColors.kit_blue, up_data, \"Up\"),\n", + " (KITColors.kit_green, down_data, \"Down\"),\n", + " ]\n", + "\n", + " for h_color, h_weights, h_label in hist_plot_infos:\n", + " \n", + " if np.any(np.isnan(h_weights)):\n", + " raise ValueError(f\"Found {np.sum(np.isnan(h_weights))} nan weights for {h_label} in main plot\")\n", + " \n", + " ax1.hist(\n", + " x=bin_mids,\n", + " bins=bin_edges_nom,\n", + " weights=h_weights,\n", + " stacked=False,\n", + " color=h_color,\n", + " lw=0.8,\n", + " label=h_label,\n", + " histtype=\"step\",\n", + " )\n", + " \n", + " for h_color, h_weights, h_label in hist_plot_infos:\n", + " \n", + " norm_from_nominal_data = np.where(nominal_data == 0.0, np.ones_like(nominal_data), nominal_data)\n", + " current_weights = np.where(h_weights == 0.0, h_weights, (h_weights / norm_from_nominal_data) - 1.0)\n", + " \n", + " if np.any(np.isnan(current_weights)):\n", + " raise ValueError(f\"Found {np.sum(np.isnan(current_weights))} nan weights for {h_label} in pull plot\")\n", + " \n", + " ax2.hist(\n", + " x=bin_mids,\n", + " bins=bin_edges_nom,\n", + " weights=current_weights,\n", + " stacked=False,\n", + " color=h_color,\n", + " lw=0.8,\n", + " label=h_label,\n", + " histtype=\"step\",\n", + " )\n", + "\n", + " ax1.legend(frameon=False, loc=1, ncol=1, fontsize=12)\n", + "\n", + " ax1.set_title(component_info.latex_label + \" in \" + reco_ch_info.latex_label, fontsize=14)\n", + " \n", + " " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "10d98fed", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a426e1c0", + "metadata": {}, + "outputs": [], + "source": [ + "test_arr = np.arange(10) * 1.0\n", + "test_arr" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2afdf294", + "metadata": {}, + "outputs": [], + "source": [ + "norm_arr = np.abs(np.arange(10) - 1) * 1.0\n", + "norm_arr" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8ac9e611", + "metadata": {}, + "outputs": [], + "source": [ + "not_zero_mask: np.ndarray = np.not_equal(norm_arr, 0.0)\n", + "not_zero_mask" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "913a5516", + "metadata": {}, + "outputs": [], + "source": [ + "_rel_res_vals: np.ndarray = np.zeros_like(test_arr)\n", + "_rel_res_vals" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "87b7a105", + "metadata": {}, + "outputs": [], + "source": [ + "np.place(_rel_res_vals, not_zero_mask, test_arr[not_zero_mask] / norm_arr[not_zero_mask])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "93e9591b", + "metadata": {}, + "outputs": [], + "source": [ + "_rel_res_vals" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "333679ef", + "metadata": {}, + "outputs": [], + "source": [ + "test_arr / norm_arr" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3391b96a", + "metadata": {}, + "outputs": [], + "source": [ + "new_res = test_arr / norm_arr\n", + "new_res[norm_arr==0.0] = 0\n", + "new_res" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c277d9a2", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f75b8509", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ae939ba7", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c99d11e8", + "metadata": { + "scrolled": false + }, + "outputs": [], + "source": [ + "plot_sys_effect(\n", + " df=full_df,\n", + " column_name=\"__hist_obs_binning_id__\",\n", + " fit_setup=this_fit_setup,\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7328f5f9", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "30854d0d", + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "plot_sys_effect(\n", + " df=full_df,\n", + " column_name=\"__inverted_hist_obs_binning_id__\",\n", + " fit_setup=this_fit_setup,\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6cc3a5d1", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1d770a70", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "127158fd", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d0388123", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9be66f60", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d9c89166", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "45dc571a", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "55289732", + "metadata": {}, + "outputs": [], + "source": [ + "@dataclass(frozen=True)\n", + "class ShapePlotInfoContainer:\n", + " name: str\n", + " latex_str: str\n", + " normed_base_shape: np.ndarray\n", + " relative_shape_error: np.ndarray\n", + " pure_bin_counts: np.ndarray\n", + " scale_factor: Optional[float] = None\n", + " components_to_consider: Optional[Tuple[str, ...]] = None\n", + " plot_subsets: bool = True\n", + "\n", + " def __post_init__(self) -> None:\n", + " assert len(self.normed_base_shape.shape) == 2, (\n", + " len(self.normed_base_shape.shape),\n", + " self.normed_base_shape.shape,\n", + " )\n", + " assert len(self.relative_shape_error.shape) == 3, (\n", + " len(self.relative_shape_error.shape),\n", + " self.relative_shape_error.shape,\n", + " )\n", + " assert self.normed_base_shape.shape == self.relative_shape_error.shape[:2], (\n", + " self.normed_base_shape.shape,\n", + " self.relative_shape_error.shape,\n", + " )\n", + " assert self.normed_base_shape.shape == self.pure_bin_counts.shape, (\n", + " self.normed_base_shape.shape,\n", + " self.pure_bin_counts.shape,\n", + " )\n", + "\n", + " @property\n", + " def relative_stat_error(self) -> np.ndarray:\n", + " norm: np.ndarray = np.where(\n", + " self.pure_bin_counts > 0,\n", + " self.pure_bin_counts,\n", + " np.ones_like(self.pure_bin_counts),\n", + " )\n", + " return np.sqrt(self.pure_bin_counts) / norm\n", + "\n", + " @property\n", + " def number_of_eigendirections(self) -> int:\n", + " return self.relative_shape_error.shape[2]\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "81e43304", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8ee9a630", + "metadata": {}, + "outputs": [], + "source": [ + "@dataclass(frozen=True)\n", + "class SpecificShapePlotInfoEntry:\n", + " name: str\n", + " latex_str: str\n", + " subset_index: int\n", + " normed_base_shape: np.ndarray\n", + " shape_error: np.ndarray\n", + " stat_error: np.ndarray\n", + " reco_ch_info: RecoChannelInfo\n", + " component_info: ComponentInfo\n", + " scale_factor: Optional[float] = None\n", + "\n", + " def __post_init__(self) -> None:\n", + " assert len(self.normed_base_shape.shape) == 1, (len(self.normed_base_shape.shape), self.normed_base_shape.shape)\n", + "\n", + " assert self.normed_base_shape.shape == self.shape_error.shape, (\n", + " self.normed_base_shape.shape,\n", + " self.shape_error.shape,\n", + " )\n", + " assert self.normed_base_shape.shape == self.stat_error.shape, (\n", + " self.normed_base_shape.shape,\n", + " self.stat_error.shape,\n", + " )\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "473eeff7", + "metadata": {}, + "outputs": [], + "source": [ + "class SpecificShapePlotInfoContainer(Mapping):\n", + " def __init__(\n", + " self,\n", + " shape_plot_infos: Tuple[SpecificShapePlotInfoEntry, ...],\n", + " ) -> None:\n", + " self._entries: Dict[str, SpecificShapePlotInfoEntry] = {v.name: v for v in shape_plot_infos}\n", + "\n", + " def __getitem__(self, item: str) -> SpecificShapePlotInfoEntry:\n", + " return self._entries[item]\n", + "\n", + " def __iter__(self) -> Iterator[str]:\n", + " return iter(self._entries)\n", + "\n", + " def __len__(self) -> int:\n", + " return len(self._entries)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7274c58f", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f9a5ece9", + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "class SystematicsShapePlotter:\n", + "\n", + " output_dir_name: ClassVar[str] = \"SystematicsShape\"\n", + " plot_name_prefix: ClassVar[str] = \"sys_shape_effect_for\"\n", + "\n", + " def __init__(\n", + " self,\n", + " base_output_dir_path: PathType,\n", + " fit_setup: FitSetupTriplet,\n", + " fig_size: Optional[Tuple[float, float]] = None,\n", + " height_ratio: Tuple[float, float] = (3.5, 1.0),\n", + " ) -> None:\n", + "\n", + " assert os.path.isdir(base_output_dir_path), base_output_dir_path\n", + "\n", + " self.output_dir_path: PathType = os.path.join(base_output_dir_path, self.output_dir_name)\n", + " self.fig_size: Optional[Tuple[float, float]] = fig_size\n", + " self.height_ratio: Tuple[float, float] = height_ratio\n", + "\n", + " self._asimov_fit_setup: FitSetupInfoContainer = fit_setup.asimov\n", + " self._fit_binning_info: FitSetupBinningPerRecoChInfo = FitSetupBinningPerRecoChInfo.init_from(\n", + " fit_setup=self._asimov_fit_setup,\n", + " )\n", + "\n", + " self._comp_to_axis_map: Dict[str, Tuple[int, int]] = {\n", + " \"BpDztau\": (0, 3),\n", + " \"BzDmtau\": (0, 2),\n", + " \"BpDzStau\": (0, 1),\n", + " \"BzDmStau\": (0, 0),\n", + " \"BpDzl\": (1, 3),\n", + " \"BzDml\": (1, 2),\n", + " \"BpDzSl\": (1, 1),\n", + " \"BzDmSl\": (1, 0),\n", + " \"DSS_in_cB\": (2, 0),\n", + " \"DSS_in_nB\": (2, 0),\n", + " \"BBbarBKG_in_cB\": (2, 1),\n", + " \"BBbarBKG_in_nB\": (2, 1),\n", + " \"CBKG_in_cB\": (2, 2),\n", + " \"CBKG_in_nB\": (2, 2),\n", + " }\n", + "\n", + " self._charged_suffix: str = \"_in_cB\"\n", + " self._neutral_suffix: str = \"_in_nB\"\n", + "\n", + " self._reco_ch_name_to_charge_suffix: Dict[str, str] = {\n", + " \"Bz_to_Dm\": self._neutral_suffix,\n", + " \"Bm_to_Dz\": self._charged_suffix,\n", + " \"Bz_to_Dsm\": self._neutral_suffix,\n", + " \"Bm_to_Dsz\": self._charged_suffix,\n", + " }\n", + "\n", + " def get_comp_to_axis_map(\n", + " self,\n", + " components_to_consider: Optional[Tuple[str, ...]],\n", + " ) -> Dict[str, Tuple[int, int]]:\n", + " if components_to_consider is None:\n", + " return {k: v for k, v in self._comp_to_axis_map.items()}\n", + "\n", + " relevant_row_indices: Tuple[int, ...] = tuple(\n", + " set(v[0] for k, v in self._comp_to_axis_map.items() if k in components_to_consider)\n", + " )\n", + " _new_comp_to_axis_map: Dict[str, Tuple[int, int]] = {\n", + " k: (v[0] - sum([1 for r in range(v[0]) if r not in relevant_row_indices]), v[1])\n", + " for k, v in self._comp_to_axis_map.items()\n", + " }\n", + " return _new_comp_to_axis_map\n", + "\n", + " @property\n", + " def fit_components(self) -> Tuple[ComponentInfo, ...]:\n", + " return self._asimov_fit_setup.components\n", + "\n", + " @property\n", + " def fit_reco_channels(self) -> Tuple[RecoChannelInfo, ...]:\n", + " return self._asimov_fit_setup.reco_channels\n", + "\n", + " @property\n", + " def fit_observable_infos(self) -> Tuple[FitObservableInfo, ...]:\n", + " return self._asimov_fit_setup.observable_infos\n", + "\n", + " def create_systematics_shape_plots(\n", + " self,\n", + " sys_shape_info: ShapePlotInfoContainer,\n", + " ) -> Generator[SpecificShapePlotInfoContainer, None, None]:\n", + " target_dir_path: PathType = os.path.join(self.output_dir_path, sys_shape_info.name)\n", + " os.makedirs(target_dir_path, exist_ok=True)\n", + "\n", + " _normed_shape_reco_ch_splits: List[np.ndarray] = np.split(\n", + " sys_shape_info.normed_base_shape,\n", + " self._fit_binning_info.reco_ch_split_ids,\n", + " axis=0,\n", + " )\n", + " _relative_shape_error_reco_ch_splits: List[np.ndarray] = np.split(\n", + " sys_shape_info.relative_shape_error,\n", + " self._fit_binning_info.reco_ch_split_ids,\n", + " axis=0,\n", + " )\n", + " _relative_stat_error_reco_ch_splits: List[np.ndarray] = np.split(\n", + " sys_shape_info.relative_stat_error,\n", + " self._fit_binning_info.reco_ch_split_ids,\n", + " axis=0,\n", + " )\n", + "\n", + " for reco_ch_id, reco_channel_info in enumerate(self.fit_reco_channels):\n", + " subset_sys_shape_info: ShapePlotInfoContainer = ShapePlotInfoContainer(\n", + " name=f\"{sys_shape_info.name}_for_reco_ch_{reco_channel_info.name}\",\n", + " latex_str=sys_shape_info.latex_str,\n", + " normed_base_shape=_normed_shape_reco_ch_splits[reco_ch_id],\n", + " relative_shape_error=_relative_shape_error_reco_ch_splits[reco_ch_id],\n", + " pure_bin_counts=_relative_stat_error_reco_ch_splits[reco_ch_id],\n", + " scale_factor=sys_shape_info.scale_factor,\n", + " components_to_consider=sys_shape_info.components_to_consider,\n", + " plot_subsets=sys_shape_info.plot_subsets,\n", + " )\n", + "\n", + " yield from self.create_systematics_shape_overview_plot_for(\n", + " sys_shape_info=subset_sys_shape_info,\n", + " reco_ch_info=reco_channel_info,\n", + " target_dir_path=target_dir_path,\n", + " )\n", + "\n", + " def create_systematics_shape_overview_plot_for(\n", + " self,\n", + " sys_shape_info: ShapePlotInfoContainer,\n", + " reco_ch_info: RecoChannelInfo,\n", + " target_dir_path: PathType,\n", + " ) -> Generator[SpecificShapePlotInfoContainer, None, None]:\n", + " n_eigendirs: int = sys_shape_info.number_of_eigendirections\n", + " for subset_id in range(n_eigendirs):\n", + " yield self.plot_systematics_shape_overview_plot_for(\n", + " subset_id=subset_id,\n", + " number_of_subsets=n_eigendirs,\n", + " sys_shape_info=sys_shape_info,\n", + " reco_ch_info=reco_ch_info,\n", + " target_dir_path=target_dir_path,\n", + " )\n", + "\n", + " def _skip_this_component(\n", + " self,\n", + " reco_ch_info: RecoChannelInfo,\n", + " component_info: ComponentInfo,\n", + " components_to_consider: Optional[Tuple[str, ...]],\n", + " ) -> bool:\n", + " if components_to_consider is not None:\n", + " if component_info.name not in components_to_consider:\n", + " return True\n", + "\n", + " ch_suffixes: Tuple[str, ...] = tuple(set(self._reco_ch_name_to_charge_suffix.values()))\n", + " if not any(component_info.name.endswith(ch_suffix) for ch_suffix in ch_suffixes):\n", + " return False\n", + "\n", + " expected_charge_suffix: str = self._reco_ch_name_to_charge_suffix[reco_ch_info.name]\n", + " if component_info.name.endswith(expected_charge_suffix):\n", + " return False\n", + " else:\n", + " return True\n", + "\n", + " def plot_systematics_shape_overview_plot_for(\n", + " self,\n", + " subset_id: int,\n", + " number_of_subsets: int,\n", + " sys_shape_info: ShapePlotInfoContainer,\n", + " reco_ch_info: RecoChannelInfo,\n", + " target_dir_path: PathType,\n", + " ) -> SpecificShapePlotInfoContainer:\n", + "\n", + " plot_file_name: str = f\"{sys_shape_info.name}_{subset_id}\"\n", + " _shape_plot_infos: List[SpecificShapePlotInfoEntry] = []\n", + "\n", + " n_plot_rows: int = 3\n", + " if sys_shape_info.components_to_consider is not None:\n", + " n_rows_needed: int = int(np.ceil(len(sys_shape_info.components_to_consider) / 4.0))\n", + " assert n_rows_needed <= 3, (n_rows_needed, len(sys_shape_info.components_to_consider))\n", + " n_plot_rows = n_rows_needed\n", + "\n", + " comp_to_axis_map: Dict[str, Tuple[int, int]] = self.get_comp_to_axis_map(\n", + " components_to_consider=sys_shape_info.components_to_consider,\n", + " )\n", + "\n", + " if self.fig_size is None:\n", + " fig_size: Tuple[float, float] = (12.0, n_plot_rows * 3.0)\n", + " else:\n", + " fig_size = self.fig_size\n", + "\n", + " fig, axes = plt.subplots(\n", + " nrows=n_plot_rows,\n", + " ncols=4,\n", + " figsize=fig_size,\n", + " dpi=300,\n", + " sharex=\"all\",\n", + " sharey=\"all\",\n", + " ) # type: FigureType, np.ndarray\n", + "\n", + " assert isinstance(axes, np.ndarray), type(axes)\n", + " assert all(isinstance(_ax, AxesType) for _ax in axes.flatten()), [type(a) for a in axes.flatten()]\n", + " if n_plot_rows > 1:\n", + " assert len(axes.shape) == 2, (len(axes.shape), axes.shape)\n", + " else:\n", + " assert len(axes.shape) == 1, (len(axes.shape), axes.shape)\n", + "\n", + " for comp_id, component in enumerate(self.fit_components):\n", + " if self._skip_this_component(\n", + " reco_ch_info=reco_ch_info,\n", + " component_info=component,\n", + " components_to_consider=sys_shape_info.components_to_consider,\n", + " ):\n", + " continue\n", + "\n", + " normed_base_shape: np.ndarray = sys_shape_info.normed_base_shape[:, comp_id]\n", + " shape_error: np.ndarray = sys_shape_info.relative_shape_error[:, comp_id, subset_id]\n", + " stat_error: np.ndarray = sys_shape_info.relative_stat_error[:, comp_id]\n", + "\n", + " assert len(normed_base_shape.shape) == 1, normed_base_shape.shape\n", + " assert len(shape_error.shape) == 1, shape_error.shape\n", + " assert len(stat_error.shape) == 1, stat_error.shape\n", + " assert normed_base_shape.shape == shape_error.shape, (normed_base_shape.shape, shape_error.shape)\n", + " assert normed_base_shape.shape == stat_error.shape, (normed_base_shape.shape, stat_error.shape)\n", + "\n", + " _shape_plot_info = SpecificShapePlotInfoEntry(\n", + " name=f\"{sys_shape_info.name}_{subset_id}_{component.name}\",\n", + " latex_str=sys_shape_info.latex_str + f\" ({subset_id})\",\n", + " subset_index=subset_id,\n", + " normed_base_shape=normed_base_shape,\n", + " shape_error=shape_error,\n", + " stat_error=stat_error,\n", + " reco_ch_info=reco_ch_info,\n", + " component_info=component,\n", + " scale_factor=sys_shape_info.scale_factor,\n", + " )\n", + "\n", + " axis_index_pair: Tuple[int, int] = comp_to_axis_map[component.name]\n", + "\n", + " if len(axes.shape) == 2:\n", + " this_axis: AxesType = axes[axis_index_pair[0], axis_index_pair[1]]\n", + " else:\n", + " this_axis = axes[axis_index_pair[1]]\n", + "\n", + " self._plot_shape_overview(\n", + " ax=this_axis,\n", + " axis_position=axis_index_pair,\n", + " is_last_row=n_plot_rows - 1 == axis_index_pair[0],\n", + " shape_plot_info=_shape_plot_info,\n", + " )\n", + "\n", + " _shape_plot_infos.append(_shape_plot_info)\n", + "\n", + " var_str: str = \"\" if number_of_subsets == 1 else f\" ({subset_id})\"\n", + " title_str: str = sys_shape_info.latex_str + f\" Sys. Shape Effect{var_str} in \" + reco_ch_info.latex_label\n", + " fig.suptitle(title_str, fontsize=18)\n", + "\n", + " export(\n", + " fig=fig,\n", + " filename=plot_file_name,\n", + " target_dir=target_dir_path,\n", + " close_figure=False,\n", + " )\n", + "\n", + " return SpecificShapePlotInfoContainer(shape_plot_infos=tuple(_shape_plot_infos))\n", + "\n", + " @staticmethod\n", + " def _plot_shape_overview(\n", + " ax: AxesType,\n", + " axis_position: Tuple[int, int],\n", + " is_last_row: bool,\n", + " shape_plot_info: SpecificShapePlotInfoEntry,\n", + " ) -> None:\n", + " assert len(shape_plot_info.shape_error.shape) == 1, shape_plot_info.shape_error.shape\n", + "\n", + " bin_edges: np.ndarray = np.arange(shape_plot_info.shape_error.shape[0] + 1) - 0.5\n", + " bin_mids: np.ndarray = (bin_edges[1:] + bin_edges[:-1]) / 2.0\n", + "\n", + " up_sys: np.ndarray = +1.0 * shape_plot_info.shape_error\n", + " down_sys: np.ndarray = -1.0 * shape_plot_info.shape_error\n", + "\n", + " hist_plot_infos: List[Tuple[str, np.ndarray, str]] = [\n", + " (KITColors.kit_blue, up_sys, \"Up/Nom\"),\n", + " (KITColors.kit_green, down_sys, \"Down/Nom\"),\n", + " ]\n", + "\n", + " ax.hlines(y=0.0, xmin=bin_edges[0], xmax=bin_edges[-1], linestyles=\"--\", lw=1.0, color=KITColors.kit_black)\n", + "\n", + " for h_color, h_weights, h_label in hist_plot_infos:\n", + " ax.hist(\n", + " x=bin_mids,\n", + " bins=bin_edges,\n", + " weights=h_weights,\n", + " stacked=False,\n", + " color=h_color,\n", + " lw=0.8,\n", + " label=h_label,\n", + " histtype=\"step\",\n", + " )\n", + "\n", + " ax.set_title(shape_plot_info.component_info.latex_label, fontsize=14)\n", + "\n", + " if axis_position == (0, 0):\n", + " ax.legend(frameon=False, loc=1, ncol=2, fontsize=6)\n", + "\n", + " if axis_position[1] == 0:\n", + " ax.set_ylabel(\"Relative Variations\", fontsize=12)\n", + " if is_last_row:\n", + " ax.set_xlabel(\"bins\", fontsize=12)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c42feadc", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2ee5f8bd", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "git": { + "suppress_outputs": true + }, + "hide_input": false, + "kernelspec": { + "display_name": "Python 3 (Belle2)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.2" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} \ No newline at end of file -- GitLab