Skip to content

Commit 3eefce0

Browse files
gartroghageboeck
authored andcommitted
New RooAbsData::split function for smaller datasets in categories
1 parent cf61ac8 commit 3eefce0

File tree

3 files changed

+116
-1
lines changed

3 files changed

+116
-1
lines changed

roofit/roofitcore/inc/RooAbsData.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ class RooAbsReal ;
3232
class RooRealVar;
3333
class RooAbsRealLValue;
3434
class RooAbsCategory ;
35+
class RooSimultaneous ;
3536
class RooAbsCategoryLValue;
3637
class Roo1DTable ;
3738
class RooPlot;
@@ -225,6 +226,9 @@ class RooAbsData : public TNamed, public RooPrintable {
225226
// Split a dataset by a category
226227
virtual TList* split(const RooAbsCategory& splitCat, Bool_t createEmptyDataSets=kFALSE) const ;
227228

229+
// Split a dataset by categories of a RooSimultaneous
230+
virtual TList* split(const RooSimultaneous& simpdf, Bool_t createEmptyDataSets=kFALSE) const ;
231+
228232
// Fast splitting for SimMaster setData
229233
Bool_t canSplitFast() const ;
230234
RooAbsData* getSimData(const char* idxstate) ;

roofit/roofitcore/src/RooAbsData.cxx

Lines changed: 111 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,7 @@ observable snapshots are stored in the dataset.
9898
#include "RooCategory.h"
9999
#include "RooTrace.h"
100100
#include "RooUniformBinning.h"
101+
#include "RooSimultaneous.h"
101102

102103
#include "RooRealVar.h"
103104
#include "RooGlobalFunc.h"
@@ -1596,6 +1597,116 @@ TList* RooAbsData::split(const RooAbsCategory& splitCat, Bool_t createEmptyDataS
15961597
return dsetList;
15971598
}
15981599

1600+
////////////////////////////////////////////////////////////////////////////////
1601+
/// Split dataset into subsets based on the categorisation of the RooSimultaneous
1602+
/// A TList of RooDataSets is returned in which each RooDataSet is named
1603+
/// after the state name of splitCat of which it contains the dataset subset.
1604+
/// The observables splitCat itself is no longer present in the sub datasets, as well as the
1605+
/// observables of the other categories.
1606+
/// If createEmptyDataSets is kFALSE (default) this method only creates datasets for states
1607+
/// which have at least one entry The caller takes ownership of the returned list and its contents
1608+
1609+
TList* RooAbsData::split(const RooSimultaneous& simpdf, Bool_t createEmptyDataSets) const
1610+
{
1611+
RooAbsCategoryLValue& splitCat = const_cast<RooAbsCategoryLValue&>(simpdf.indexCat());
1612+
1613+
// Sanity check
1614+
if (!splitCat.dependsOn(*get())) {
1615+
coutE(InputArguments) << "RooTreeData::split(" << GetName() << ") ERROR category " << splitCat.GetName()
1616+
<< " doesn't depend on any variable in this dataset" << endl ;
1617+
return 0 ;
1618+
}
1619+
1620+
// Clone splitting category and attach to self
1621+
RooAbsCategory* cloneCat =0;
1622+
RooArgSet* cloneSet = 0;
1623+
if (splitCat.isDerived()) {
1624+
cloneSet = (RooArgSet*) RooArgSet(splitCat).snapshot(kTRUE) ;
1625+
if (!cloneSet) {
1626+
coutE(InputArguments) << "RooTreeData::split(" << GetName() << ") Couldn't deep-clone splitting category, abort." << endl ;
1627+
return 0 ;
1628+
}
1629+
cloneCat = (RooAbsCategory*) cloneSet->find(splitCat.GetName()) ;
1630+
cloneCat->attachDataSet(*this) ;
1631+
} else {
1632+
cloneCat = dynamic_cast<RooAbsCategory*>(get()->find(splitCat.GetName())) ;
1633+
if (!cloneCat) {
1634+
coutE(InputArguments) << "RooTreeData::split(" << GetName() << ") ERROR category " << splitCat.GetName()
1635+
<< " is fundamental and does not appear in this dataset" << endl ;
1636+
return 0 ;
1637+
}
1638+
}
1639+
1640+
// Split a dataset in a series of subsets, each corresponding
1641+
// to a state of splitCat
1642+
TList* dsetList = new TList ;
1643+
1644+
// Construct set of variables to be included in split sets = full set - split category
1645+
RooArgSet subsetVars(*get()) ;
1646+
if (splitCat.isDerived()) {
1647+
RooArgSet* vars = splitCat.getVariables() ;
1648+
subsetVars.remove(*vars,kTRUE,kTRUE) ;
1649+
delete vars ;
1650+
} else {
1651+
subsetVars.remove(splitCat,kTRUE,kTRUE) ;
1652+
}
1653+
1654+
// Add weight variable explicitly if dataset has weights, but no top-level weight
1655+
// variable exists (can happen with composite datastores)
1656+
Bool_t addWV(kFALSE) ;
1657+
RooRealVar newweight("weight","weight",-1e9,1e9) ;
1658+
if (isWeighted() && !IsA()->InheritsFrom(RooDataHist::Class())) {
1659+
subsetVars.add(newweight) ;
1660+
addWV = kTRUE ;
1661+
}
1662+
1663+
// By default, remove all category observables from the subdatasets
1664+
RooArgSet allObservables;
1665+
for( const auto& catPair : splitCat) {
1666+
const auto& catPdf = simpdf.getPdf(catPair.first.c_str());
1667+
allObservables.add(*(catPdf->getObservables(this)));
1668+
}
1669+
subsetVars.remove(allObservables, kTRUE, kTRUE);
1670+
1671+
1672+
// If createEmptyDataSets is true, prepopulate with empty sets corresponding to all states
1673+
if (createEmptyDataSets) {
1674+
for (const auto& nameIdx : *cloneCat) {
1675+
// Add in the subset only the observables corresponding to this category
1676+
RooArgSet subsetVarsCat(subsetVars);
1677+
const auto& catPdf = simpdf.getPdf(nameIdx.first.c_str());
1678+
subsetVarsCat.add(*(catPdf->getObservables(this)));
1679+
1680+
RooAbsData* subset = emptyClone(nameIdx.first.c_str(), nameIdx.first.c_str(), &subsetVarsCat,(addWV?"weight":0)) ;
1681+
dsetList->Add((RooAbsArg*)subset) ;
1682+
}
1683+
}
1684+
1685+
1686+
// Loop over dataset and copy event to matching subset
1687+
const bool propWeightSquared = isWeighted();
1688+
for (Int_t i = 0; i < numEntries(); ++i) {
1689+
const RooArgSet* row = get(i);
1690+
RooAbsData* subset = (RooAbsData*) dsetList->FindObject(cloneCat->getCurrentLabel());
1691+
if (!subset) {
1692+
// Add in the subset only the observables corresponding to this category
1693+
RooArgSet subsetVarsCat(subsetVars);
1694+
const auto& catPdf = simpdf.getPdf(cloneCat->getCurrentLabel());
1695+
subsetVarsCat.add(*(catPdf->getObservables(this)));
1696+
subset = emptyClone(cloneCat->getCurrentLabel(),cloneCat->getCurrentLabel(),&subsetVarsCat,(addWV?"weight":0));
1697+
dsetList->Add((RooAbsArg*)subset);
1698+
}
1699+
if (!propWeightSquared) {
1700+
subset->add(*row, weight());
1701+
} else {
1702+
subset->add(*row, weight(), weightSquared());
1703+
}
1704+
}
1705+
1706+
delete cloneSet;
1707+
return dsetList;
1708+
}
1709+
15991710
////////////////////////////////////////////////////////////////////////////////
16001711
/// Plot dataset on specified frame.
16011712
///

roofit/roofitcore/src/RooAbsTestStatistic.cxx

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -459,7 +459,7 @@ void RooAbsTestStatistic::initSimMode(RooSimultaneous* simpdf, RooAbsData* data,
459459
RooAbsCategoryLValue& simCat = const_cast<RooAbsCategoryLValue&>(simpdf->indexCat());
460460

461461
TString simCatName(simCat.GetName());
462-
TList* dsetList = const_cast<RooAbsData*>(data)->split(simCat,processEmptyDataSets());
462+
TList* dsetList = const_cast<RooAbsData*>(data)->split(*simpdf,processEmptyDataSets());
463463
if (!dsetList) {
464464
coutE(Fitting) << "RooAbsTestStatistic::initSimMode(" << GetName() << ") ERROR: index category of simultaneous pdf is missing in dataset, aborting" << endl;
465465
throw std::runtime_error("RooAbsTestStatistic::initSimMode() ERROR, index category of simultaneous pdf is missing in dataset, aborting");

0 commit comments

Comments
 (0)