Skip to content
Snippets Groups Projects
Commit 8c5abfe0 authored by Thorsten Chwalek's avatar Thorsten Chwalek
Browse files

Upload New File

parent 964fa70d
Branches
No related tags found
No related merge requests found
# Assigns an ID to each sample along with different access functions
# Author in C++: Matthias Schroeder
# matthias.schroeder@AT@desy.de
# November 2013
import glob
import numpy as np # manipulations and operations on arrays
import pandas as pd # easy access to data structures
import matplotlib.pyplot as plt # plotting
from matplotlib.lines import Line2D
import time
from scipy.stats import poisson
from scipy.stats import norm
import ast # useful for converting strings to lists
class Sample:
#path to data needs to be set manually
path = "/Unknown Path/"
# check if ID is correct
def checkId(id):
if( id > 6 or id < 0 ):
print("\n\nERROR invalid sample id ",id)
# input id output full file name
def fileNameFullSample(id):
Sample.checkId(id)
if id == 0: name = "4VectorData.csv"
if id == 7: name = "MuHadUnnested.csv"
if id == 1: name = "4VectorWJets.csv"
if id == 2: name = "4VectorTTJets.csv"
if id == 3: name = "4VectorZJets.csv"
if id == 4: name = "4VectorQCD.csv"
if id == 5: name = "4VectorLM6.csv"
if id == 6: name = "4VectorLM9.csv"
name = Sample.path + name
#for i in range(len(name)):
# name[i] = Sample.path+name[i]
return name
# input id output name of data (label) for legends in plot
def label(id):
Sample.checkId(id)
label = ""
if( id == 0 ): label += "Data"
if( id == 1 ): label += "W(l#nu)+Jets"
if( id == 2 ): label += "t#bar{t}+Jets"
if( id == 3 ): label += "Z(#nu#bar{#nu})+Jets"
if( id == 4 ): label += "QCD"
if( id == 5 ): label += "LM6"
if( id == 6 ): label += "LM9"
return label
# sample name without spaces for files
def toString(id):
Sample.checkId(id)
string = ""
if( id == 0 ): string += "Data"
if( id == 1 ): string += "WJets"
if( id == 2 ): string += "TTJets"
if( id == 3 ): string += "ZJets"
if( id == 4 ): string += "QCD"
if( id == 5 ): string += "LM6"
if( id == 6 ): string += "LM9"
return string
# assigns color to each sample for plotting
def color(id):
Sample.checkId(id)
color = 0;
if( id == 0 ): color = 'k'
if( id == 1 ): color = 'g'
if( id == 2 ): color = 'r'
if( id == 3 ): color = 'y'
if( id == 4 ): color = 'm'
if( id == 5 ): color = 'b'
if( id == 6 ): color = 'c'
return color
# returns cross section:
def CrossSection(ID):
if ID == 1:
return(30.08)
if ID == 2:
return(127.06)
if ID == 3:
return(6.26)
if ID == 4:
return(8630.0)
if ID == 5:
return(0.502)
if ID == 6:
return(9.287)
else:
return('Wrong ID: cross section not found')
# total Number of MC events prior to selection
def TotalEvents(ID):
if ID == 1:
return(6619654.)
if ID == 2:
return(1925324.)
if ID == 3:
return(51637.)
if ID == 4:
return(44443155.)
if ID == 5:
return(51282.)
if ID == 6:
return(51282.)
else:
return('Wrong ID: Total Number of MC Events not found')
# takes a dataframe or a list of dataframes and returns the maximum number of jets in the dataframe
def MaxNJets(df):
if isinstance(df, list):
out = [(len(x.columns)-2)//4 for x in df ]
elif isinstance(df, pd.DataFrame):
out = (len(df.columns)-2)//4
else:
print('MaxNJets Error: Input is not a dataframe or a list of dataframes')
return(out) # returns list of MaxNJets for the dataframes in input
# takes a dataframe or a list of dataframes and returns list of the jets as strings
def JetNames(df):
if isinstance(df, list):
out = []
for element in df:
Jets_in_df = ['Jet {}'.format(i) for i in range(Sample.MaxNJets(element))]
out.append(Jets_in_df)
elif isinstance(df, pd.DataFrame):
out = ['Jet {}'.format(i) for i in range(Sample.MaxNJets(df))]
else:
print('Jets Error: Input is not a dataframe or a list of dataframes')
return (out)
# takes a dataframe and a Jetnumber and returns needed jet as tuble
def Get_Jet(df,Jet_Number):
return(df[Sample.JetNames(df)[Jet_Number]])
# subtract arrays while ignoring NaN values
def nansubtract(x1,x2):
return(np.nansum(np.array([x1,-1*x2]),axis = 0))
def Bin_Values(Data, Bins, Weights):
binValues, binEdges = np.histogram( Data, bins = Bins ,weights = Weights)
bincenters = 0.5*(binEdges[1:]+binEdges[:-1])
out = [ '{:.10f}'.format(a) for a in binValues]
return(out)
# Compare data with background and signal distributions
def Compare(Background, Signal, Data, Bins, Labels = None, Colors = None):
# Function Variables:
#Bkg: List of background dataframes
#Sgl: List of signal dataframes
#Data: Data Dataframe NOT a List
#Bins: list of bins for MaxNJets, Ht, and MHt in this order
#labels: list of labels in following order: [Data,Background,Signal]
#color: list of colors in following order: [Data,Background,Signal]
np.set_printoptions(formatter={'float_kind':'{:f}'.format})
fig, axs = plt.subplots(1, 3, figsize = (21.6,4.8))
# Plotting Background
Bkg_Weights = [Bkg['Weights'] for Bkg in Background]
Bkg_Njets = [Bkg['NJets'] for Bkg in Background]
Bkg_Ht = [Bkg['Ht'] for Bkg in Background]
Bkg_MHt = [Bkg['MHt']for Bkg in Background]
Bkg_labels = [Labels[i+1] for i in range(len(Background))] # +1 to aviod counting data as background
Bkg_colors = [Colors[i+1] for i in range(len(Background))]
axs[0].hist(Bkg_Njets, bins = Bins[0] , color = Bkg_colors, label = Bkg_labels,stacked=True,weights = Bkg_Weights)
axs[1].hist(Bkg_Ht , bins = Bins[1] ,color = Bkg_colors, label = Bkg_labels,stacked=True,weights = Bkg_Weights)
axs[2].hist( Bkg_MHt , bins = Bins[2] ,color = Bkg_colors, label = Bkg_labels,stacked=True,weights = Bkg_Weights)
# Plotting Signals
for signal_index,signal_dataframe in enumerate(Signal):
Signal_Weights = signal_dataframe['Weights']
Signal_Njets = signal_dataframe['NJets']
Signal_Ht = signal_dataframe['Ht']
Signal_MHt = signal_dataframe['MHt']
Signal_labels = Labels[len(Background)+1+signal_index] # note that Labels = [Data_label, Background_labels, Signal_labels]
Signal_colors = Colors[len(Background)+1+signal_index] # note that Labels = [Data_label, Background_labels, Signal_labels]
axs[0].hist(Signal_Njets,bins=Bins[0],color=Signal_colors,label=Signal_labels,stacked=False,weights=Signal_Weights,histtype=u'step')
axs[1].hist(Signal_Ht,bins=Bins[1],color=Signal_colors,label=Signal_labels,stacked=False,weights=Signal_Weights,histtype=u'step')
axs[2].hist( Signal_MHt,bins=Bins[2],color=Signal_colors,label=Signal_labels,stacked=False,weights=Signal_Weights,histtype=u'step')
# Plotting Data
for Variable_index,Variable in enumerate(['NJets','Ht','MHt']):
binValues, binEdges = np.histogram(Data[Variable], bins =Bins[Variable_index], weights = Data['Weights'])
bincenters = 0.5*(binEdges[1:]+binEdges[:-1])
menStd = np.sqrt(binValues)
axs[Variable_index].errorbar(bincenters, binValues,color = Colors[0],yerr = np.sqrt(binValues) ,label = Labels[0] ,fmt='o')
#----Adjust--Axis---
for N_axs in [0,1,2]:
axs[N_axs].set_yscale("log")
axs[N_axs].set_ylabel('Events',fontsize=16)
axs[N_axs].legend()
axs[0].set_xlabel('N(jets)',fontsize=16)
axs[1].set_xlabel('$H_{t}$ [GeV]',fontsize=16)
axs[2].set_xlabel("$ MH_{t}[GeV] $",fontsize=16)
plt.show()
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment