import basf2
import modularAnalysis as ma
from typing import List
from variables import variables as vm
import pandas as pd
from vibe.core.utils.misc import fancy_validation_mode_header
from vibe.core.validation_mode import ValidationModeBaseClass
from vibe.core.helper.skims.skim_attribute_tools import SkimAttributes, AnalysisParameters
from vibe.core.helper.histogram_tools import HistVariable, Histogram, HistComponent
from vibe.core.helper.root_helper import makeROOTCompatible
__all__ = [
"TestOfflineSkim",
]
[docs]
@fancy_validation_mode_header
class TestOfflineSkim(ValidationModeBaseClass):
"""
An example template for a full skim->validation pipeline in vibe.
Parameters
----------
name : str
Unique name of your skim mode class. This must match a key inside the skim_production_modes.JSON file
NOTE
----
* This template in conjunction with the skim_production_modes.JSON file allows the user to submit any number of skims over any number of MC types.
* The create_basf2_path function is used to construct the basf2 path for analysis reconstruction and validation of the skimmed MC.
* The customisation allowed to a user is through the get_skim_attributes method. In here, the user must fill out the named tuple SkimAttributes as shown below.
skim_name :
The skim_name property must be at least a string containing a valid skim name (this is checked at the beginning of run time). However, skim_name can be overloaded to be a list of skim names in which the user can have multiple skims run at once over the same MC. This is useful say for the fei, combining feiSl and feiHadronic such that:
skim_name = ['feiSL','feiHadronic']
globaltag :
The globaltag is an optional property. If the user requires a specific globaltag for their skims then is is vital they input it here. If None is given then the most up-to-date globaltag will be used.
grid_test_mode :
This flag if set to true will reduce the number of events per grid job to 100 events. Note that for skim names with low retentions, there is a chance that no ntuple will be saved to the grid if the retention threshold is not met.
skim_to_analysis_pipeline :
This variable can either be False for purely skimming, however in the case of a full pipeline this variable must return an AnalysisParameters() named tuple and if required the user can fill in the AnalysisParameters with kwargs as shown below
Note, AnalysisParameters also has a value called 'globaltag' in which a list of globaltags can be passed which will be attached to the analysis basf2 path.
"""
name = "2offlineSkimExample"
[docs]
def get_skim_attributes(self):
return SkimAttributes(
# [REQUIRED] List of reconstruction types to use on a given lpn
skim_name = ['BtoXll', 'BtoXgamma'],
# [OPTIONAL] Globaltag
globaltag = 'analysis_tools_light-2305-korat',
# [OPTIONAL] When True set the number of events to be reconstructed to 100 (Decrease runtime for testing)
grid_test_mode = True,
# [OPTIONAL] If not None, full pipeline will be engaged and the AnalysisParameters is the space for the user to configure the analysis. What this allows the users
# to do is add globaltags and kwargs to the dataset_dict inside of skim_prodcution_mode.json that is automatically produced and updated during runtime
skim_to_analysis_pipeline = AnalysisParameters(
kwargs= {
"BtoXll" : {"treename" : "B+:xll"},
"BtoXgamma" : {"treename" : "B+:gamma"}
}
)
)
# User can make this analysis reconstruction whatever they please, any key-word arguments that are included must be added to the "kwargs" property of AnalysisParameters
[docs]
def create_basf2_path(self, treename : str):
main_path = basf2.Path()
ma.matchMCTruth(
treename,
path=main_path,
)
# select signal + build ROE
ma.buildRestOfEvent(treename, path=main_path)
# define ROE mask cuts
tracksCut = "abs(d0) < 10.0 and abs(z0) < 20.0"
clustersCut1 = "[clusterE1E9 > 0.4 or E > 0.075] and [[E > 0.062 and abs(clusterTiming)<18 and clusterReg==1] or "
clustersCut2 = (
"[E > 0.060 and abs(clusterTiming)<20 and clusterReg==2]"
" or [E > 0.056 and abs(clusterTiming)<44 and clusterReg==3]]"
)
clustersCut = clustersCut1 + clustersCut2
# define and append ROE masks
m1 = ("m1", tracksCut, clustersCut)
ma.appendROEMasks(treename, [m1], path=main_path)
ma.rankByHighest(
treename,
"extraInfo(sigProb)",
#allowMultiRank=True,
outputVariable="sigProbRank",
path=main_path,
)
ma.buildEventShape(inputListNames=[treename], path=main_path)
ma.buildEventKinematics(inputListNames=[treename], path=main_path)
vm.addAlias('sigProbRank', 'extraInfo(sigProbRank)')
vm.addAlias('cosThetaBY', 'cosThetaBetweenParticleAndNominalB')
vm.addAlias("dmID", "extraInfo(decayModeID)")
self.variables_to_validation_ntuple(
decay_str=treename,
variables=list(
{
"cosThetaBY",
"isSignalAcceptMissingNeutrino",
"sigProbRank",
"dmID",
"Mbc",
"foxWolframR2"
}
),
path=main_path,
)
return main_path
@property
def analysis_validation_histograms(self) -> List[Histogram]:
return [
Histogram(
name='cosThetaBY',
title=r"cos$\theta_{BY}$",
hist_variable=HistVariable(
df_label=makeROOTCompatible(variable="cosThetaBY"),
label=r"cos$\theta_{BY}$",
unit=r"",
bins=50,
scope=(-5, 5),
),
hist_components = [
HistComponent(
label="rank1 B's",
additional_cut_str= "sigProbRank==1",
),
]
),
Histogram(
name='cosThetaBY_allVSrank1',
title=r"cos$\theta_{BY}$: rank 1 vs rest",
hist_variable=HistVariable(
df_label=makeROOTCompatible(variable="cosThetaBY"),
label=r"cos$\theta_{BY}$",
unit=r"",
bins=50,
scope=(-5, 5),
),
hist_components = [
HistComponent(
label='all ranks'
),
HistComponent(
label="rank1 B's",
additional_cut_str= "sigProbRank==1",
),
]
),
Histogram(
name='Mbc',
title=r"B^0 Mbc",
hist_variable=HistVariable(
df_label=makeROOTCompatible(variable="Mbc"),
label=r"Mbc",
unit=r"GeV/c^2",
bins=50,
scope=(2.5, 7.5),
),
hist_components= []
),
Histogram(
name='dmID',
title=r"B^0 dmID",
hist_variable=HistVariable(
df_label=makeROOTCompatible(variable=f"dmID"),
label=r"dmID",
unit=r"",
bins=20,
scope=(0,20),
),
hist_components=[]
),
Histogram(
name='foxWolframR2',
title=r"foxWolframR2",
hist_variable=HistVariable(
df_label=makeROOTCompatible(variable="foxWolframR2"),
label=r"foxWolframR2",
unit=r"",
bins=10,
scope=(0,1),
),
hist_components=[]
)
]
[docs]
def get_number_of_signal_for_efficiency(self, df: pd.DataFrame) -> float:
return df["isSignalAcceptMissingNeutrino"].sum()