Skip to content

Commit 764325a

Browse files
committed
Loading only the 27 variables we are interested in
1 parent ba03966 commit 764325a

File tree

1 file changed

+25
-6
lines changed

1 file changed

+25
-6
lines changed

data_loader.py

Lines changed: 25 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -15,19 +15,38 @@ def load_cms_data(filename="open_cms_data.root"):
1515
# The Collection we want is: recoPFJets_ak5PFJets__RECO
1616

1717
recoPFJets = events_tree['recoPFJets_ak5PFJets__RECO.']['recoPFJets_ak5PFJets__RECO.obj']
18-
recoPFJets.show(name_width=100, typename_width=100)
18+
#recoPFJets.show(name_width=100, typename_width=100)
1919

2020
prefix = 'recoPFJets_ak5PFJets__RECO.obj.'
21-
# Store the data in a pandas dataframe
21+
# Store the 27 variables we are interested in to a pandas dataframe
2222
dataframe = recoPFJets.arrays(
23-
[prefix + 'qx3_', prefix + 'pt_', prefix + 'eta_', prefix + 'phi_', prefix + 'mass_'],
24-
library="pd")
23+
[prefix + 'pt_', prefix + 'eta_', prefix + 'phi_', prefix + 'mass_', prefix + 'vertex_.fCoordinates.fX',
24+
prefix + 'vertex_.fCoordinates.fY', prefix + 'vertex_.fCoordinates.fZ', prefix + 'mJetArea', prefix + 'mPileupEnergy',
25+
prefix + 'm_specific.mChargedHadronEnergy', prefix + 'm_specific.mNeutralHadronEnergy',
26+
prefix + 'm_specific.mPhotonEnergy', prefix + 'm_specific.mElectronEnergy',
27+
prefix + 'm_specific.mMuonEnergy', prefix + 'm_specific.mHFHadronEnergy', prefix + 'm_specific.mHFEMEnergy',
28+
prefix + 'm_specific.mChargedHadronMultiplicity', prefix + 'm_specific.mNeutralHadronMultiplicity',
29+
prefix + 'm_specific.mPhotonMultiplicity', prefix + 'm_specific.mElectronMultiplicity', prefix + 'm_specific.mMuonMultiplicity',
30+
prefix + 'm_specific.mHFHadronMultiplicity', prefix + 'm_specific.mHFEMMultiplicity',
31+
prefix + 'm_specific.mChargedEmEnergy', prefix + 'm_specific.mChargedMuEnergy', prefix + 'm_specific.mNeutralEmEnergy',
32+
prefix + 'm_specific.mChargedMultiplicity', prefix + 'm_specific.mNeutralMultiplicity'], library="pd")
33+
34+
prefix2 = 'ak5PFJets.'
35+
# Rename the column names to be shorter
36+
dataframe.columns = [prefix2 + 'pt_', prefix2 + 'eta_', prefix2 + 'phi_', prefix2 + 'mass_',
37+
prefix2 + 'fX', prefix2 + 'fY', prefix2 + 'fZ', prefix2 + 'mJetArea', prefix2 + 'mPileupEnergy',
38+
prefix2 + 'mChargedHadronEnergy', prefix2 + 'mNeutralHadronEnergy', prefix2 + 'mPhotonEnergy',
39+
prefix2 + 'mElectronEnergy', prefix2 + 'mMuonEnergy', prefix2 + 'mHFHadronEnergy',
40+
prefix2 + 'mHFEMEnergy', prefix2 + 'mChargedHadronMultiplicity', prefix2 + 'mNeutralHadronMultiplicity',
41+
prefix2 + 'mPhotonMultiplicity', prefix2 + 'mElectronMultiplicity', prefix2 + 'mMuonMultiplicity',
42+
prefix2 + 'mHFHadronMultiplicity', prefix2 + 'mHFEMMultiplicity', prefix2 + 'mChargedEmEnergy',
43+
prefix2 + 'mChargedMuEnergy', prefix2 + 'mNeutralEmEnergy', prefix2 + 'mChargedMultiplicity',
44+
prefix2 + 'mNeutralMultiplicity']
2545

26-
dataframe.columns = ['qx3_', 'pt_', 'eta_', 'phi_', 'mass_']
2746

2847
print("\nDataframe:")
2948
print(dataframe.head())
30-
49+
dataframe.to_csv('27D_opensCMS_data.csv')
3150
return dataframe
3251

3352

0 commit comments

Comments
 (0)