Source code for vibe.skim_production_modes.example.offline_skim_fullpipeline

import basf2
import modularAnalysis as ma

from typing import List
from variables import variables as vm
import pandas as pd

from vibe.core.utils.misc import fancy_validation_mode_header
from vibe.core.validation_mode import ValidationModeBaseClass
from vibe.core.helper.skims.skim_attribute_tools import SkimAttributes, AnalysisParameters
from vibe.core.helper.histogram_tools import HistVariable, Histogram, HistComponent
from vibe.core.helper.root_helper import makeROOTCompatible


__all__ = [
    "TestOfflineSkim",
]



[docs]
@fancy_validation_mode_header
class TestOfflineSkim(ValidationModeBaseClass):
    """
    An example template for a full skim->validation pipeline in vibe.

    Parameters
    ----------
    name : str
         Unique name of your skim mode class. This must match a key inside the skim_production_modes.JSON file

    NOTE
    ----
    * This template in conjunction with the skim_production_modes.JSON file allows the user to submit any number of skims over any number of MC types.

    * The create_basf2_path function is used to construct the basf2 path for analysis reconstruction and validation of the skimmed MC.

    * The customisation allowed to a user is through the get_skim_attributes method. In here, the user must fill out the named tuple SkimAttributes as shown below.


    skim_name :
        The skim_name property must be at least a string containing a valid skim name (this is checked at the beginning of run time). However, skim_name can be overloaded to be a list of skim names in which the user can have multiple skims run at once over the same MC. This is useful say for the fei, combining feiSl and feiHadronic such that:
        skim_name = ['feiSL','feiHadronic']


    globaltag :
        The globaltag is an optional property. If the user requires a specific globaltag for their skims then is is vital they input it here. If None is given then the most up-to-date globaltag will be used.

    grid_test_mode :
        This flag if set to true will reduce the number of events per grid job to 100 events. Note that for skim names with low retentions, there is a chance that no ntuple will be saved to the grid if the retention threshold is not met.

    skim_to_analysis_pipeline :
        This variable can either be False for purely skimming, however in the case of a full pipeline this variable must return an AnalysisParameters() named tuple and if required the user can fill in the AnalysisParameters with kwargs as shown below
        Note, AnalysisParameters also has a value called 'globaltag' in which a list of globaltags can be passed which will be attached to the analysis basf2 path.

    """

    name = "2offlineSkimExample"


[docs]
    def get_skim_attributes(self):
        return SkimAttributes(
            # [REQUIRED] List of reconstruction types to use on a given lpn
            skim_name=["BtoXll", "BtoXgamma"],
            # [OPTIONAL] Globaltag
            globaltag="analysis_tools_light-2305-korat",
            # [OPTIONAL] When True set the number of events to be reconstructed to 100 (Decrease runtime for testing)
            grid_test_mode=True,
            # [OPTIONAL] If not None, full pipeline will be engaged and the AnalysisParameters is the space for the user to configure the analysis. What this allows the users
            # to do is add globaltags and kwargs to the dataset_dict inside of skim_prodcution_mode.json that is automatically produced and updated during runtime
            skim_to_analysis_pipeline=AnalysisParameters(
                kwargs={"BtoXll": {"treename": "B+:xll"}, "BtoXgamma": {"treename": "B+:gamma"}}
            ),
        )


    # User can make this analysis reconstruction whatever they please, any key-word arguments that are included must be added to the "kwargs" property of AnalysisParameters

[docs]
    def create_basf2_path(self, treename: str):
        main_path = basf2.Path()

        ma.matchMCTruth(
            treename,
            path=main_path,
        )

        # select signal + build ROE
        ma.buildRestOfEvent(treename, path=main_path)

        # define ROE mask cuts
        tracksCut = "abs(d0) < 10.0 and abs(z0) < 20.0"
        clustersCut1 = "[clusterE1E9 > 0.4 or E > 0.075] and [[E > 0.062 and abs(clusterTiming)<18 and clusterReg==1] or "
        clustersCut2 = (
            "[E > 0.060 and abs(clusterTiming)<20 and clusterReg==2]"
            " or [E > 0.056 and abs(clusterTiming)<44 and clusterReg==3]]"
        )
        clustersCut = clustersCut1 + clustersCut2

        # define and append ROE masks
        m1 = ("m1", tracksCut, clustersCut)
        ma.appendROEMasks(treename, [m1], path=main_path)

        ma.rankByHighest(
            treename,
            "extraInfo(sigProb)",
            # allowMultiRank=True,
            outputVariable="sigProbRank",
            path=main_path,
        )

        ma.buildEventShape(inputListNames=[treename], path=main_path)
        ma.buildEventKinematics(inputListNames=[treename], path=main_path)

        vm.addAlias("sigProbRank", "extraInfo(sigProbRank)")
        vm.addAlias("cosThetaBY", "cosThetaBetweenParticleAndNominalB")
        vm.addAlias("dmID", "extraInfo(decayModeID)")

        self.variables_to_validation_ntuple(
            decay_str=treename,
            variables=list({"cosThetaBY", "isSignalAcceptMissingNeutrino", "sigProbRank", "dmID", "Mbc", "foxWolframR2"}),
            path=main_path,
        )

        return main_path


    @property
    def analysis_validation_histograms(self) -> List[Histogram]:
        return [
            Histogram(
                name="cosThetaBY",
                title=r"cos$\theta_{BY}$",
                hist_variable=HistVariable(
                    df_label=makeROOTCompatible(variable="cosThetaBY"),
                    label=r"cos$\theta_{BY}$",
                    unit=r"",
                    bins=50,
                    scope=(-5, 5),
                ),
                hist_components=[
                    HistComponent(
                        label="rank1 B's",
                        additional_cut_str="sigProbRank==1",
                    ),
                ],
            ),
            Histogram(
                name="cosThetaBY_allVSrank1",
                title=r"cos$\theta_{BY}$: rank 1 vs rest",
                hist_variable=HistVariable(
                    df_label=makeROOTCompatible(variable="cosThetaBY"),
                    label=r"cos$\theta_{BY}$",
                    unit=r"",
                    bins=50,
                    scope=(-5, 5),
                ),
                hist_components=[
                    HistComponent(label="all ranks"),
                    HistComponent(
                        label="rank1 B's",
                        additional_cut_str="sigProbRank==1",
                    ),
                ],
            ),
            Histogram(
                name="Mbc",
                title=r"B^0 Mbc",
                hist_variable=HistVariable(
                    df_label=makeROOTCompatible(variable="Mbc"),
                    label=r"Mbc",
                    unit=r"GeV/c^2",
                    bins=50,
                    scope=(2.5, 7.5),
                ),
                hist_components=[],
            ),
            Histogram(
                name="dmID",
                title=r"B^0 dmID",
                hist_variable=HistVariable(
                    df_label=makeROOTCompatible(variable="dmID"),
                    label=r"dmID",
                    unit=r"",
                    bins=20,
                    scope=(0, 20),
                ),
                hist_components=[],
            ),
            Histogram(
                name="foxWolframR2",
                title=r"foxWolframR2",
                hist_variable=HistVariable(
                    df_label=makeROOTCompatible(variable="foxWolframR2"),
                    label=r"foxWolframR2",
                    unit=r"",
                    bins=10,
                    scope=(0, 1),
                ),
                hist_components=[],
            ),
        ]


[docs]
    def get_number_of_signal_for_efficiency(self, df: pd.DataFrame) -> float:
        return df["isSignalAcceptMissingNeutrino"].sum()