Skip to content

Commit

Permalink
cms-2016-simulated-datasets: further enriching and cleaning as in #182
Browse files Browse the repository at this point in the history
  • Loading branch information
Kati Lassila-Perini authored and tiborsimko committed Feb 5, 2024
1 parent 3f30eb0 commit 5014bfd
Show file tree
Hide file tree
Showing 4 changed files with 31 additions and 24 deletions.
4 changes: 0 additions & 4 deletions cms-2016-simulated-datasets/code/das_json_store.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,6 @@ def create(dataset, das_dir):
result_file = dataset.replace('/', '@') + ".json"
mydasgoclient(dataset, "dataset", das_dir, result_file)
mydasgoclient(dataset, "parent", das_dir, result_file)
mydasgoclient(dataset, "config", das_dir, result_file)
mydasgoclient(dataset, "release", das_dir, result_file)


Expand All @@ -88,7 +87,6 @@ def main(das_dir,
# create dirs for dataset and release
for path in [das_dir + '/dataset',
das_dir + '/parent',
das_dir + '/config',
das_dir + '/release']:
if not os.path.exists(path):
os.makedirs(path)
Expand All @@ -112,8 +110,6 @@ def main(das_dir,

def get_generator_parameters(dataset, das_dir):
"""Return generator parameters dictionary for given dataset. Not used in 2016"""
# TODO get from mcm store instead?
# and/or from xsecDB
out = get_from_deep_json(get_das_store_json(dataset, 'mcm', das_dir),
'generator_parameters')
if out:
Expand Down
33 changes: 21 additions & 12 deletions cms-2016-simulated-datasets/code/dataset_records.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,7 @@
from requests.packages.urllib3.exceptions import InsecureRequestWarning

from categorisation import guess_title_category
from das_json_store import (get_cmssw_version_from_das, get_das_store_json,
get_generator_parameters, get_parent_dataset)
from das_json_store import (get_das_store_json, get_parent_dataset)
from eos_store import (XROOTD_URI_BASE, get_dataset_index_file_base,
get_dataset_location)
from mcm_store import (get_cmsDriver_script, get_cmssw_version_from_mcm,
Expand All @@ -37,9 +36,16 @@

requests.packages.urllib3.disable_warnings(InsecureRequestWarning)

recid_freerange_start = 30000 #FIXME not in use, using inputs/recid_info.py for now
recommended_gt = "106X_mcRun2_asymptotic_v17"
recommended_cmssw = "CMSSW_10_6_30"
collision_energy = "13TeV"
collision_type = "pp"
year_published = "2023"

LINK_INFO = {}

CONTAINERIMAGES_CACHE = {}

def get_number_events(dataset, das_dir):
"""Return number of events for the dataset."""
Expand Down Expand Up @@ -190,7 +196,7 @@ def get_all_generator_text(dataset, das_dir, mcm_dir, conf_dir, recid_info):
output_dataset = get_output_dataset_from_mcm(dataset, mcm_step_dir)
if output_dataset:
step['output_dataset'] = output_dataset
release = get_cmssw_version(dataset, das_dir, mcm_step_dir)
release = get_cmssw_version_from_mcm(dataset, mcm_step_dir)
if release:
step['release'] = release
global_tag = get_global_tag(dataset, mcm_step_dir)
Expand Down Expand Up @@ -253,9 +259,6 @@ def get_all_generator_text(dataset, das_dir, mcm_dir, conf_dir, recid_info):

info["steps"].append(step)

# reverse order of steps for provenance FIXME: order should be LHEGEN/GEN, SIM, DIGI2RAW, HLT, RECO, PAT, NANO
info['steps'].reverse()

# post-generation fix: if we have LHE step, let's modify the configuration file titles for other steps
# FIXME: is this now dublicate of the condition above?
lhe_present = False
Expand All @@ -280,6 +283,12 @@ def get_all_generator_text(dataset, das_dir, mcm_dir, conf_dir, recid_info):

return info

def populate_containerimages_cache():
"""Populate CONTAINERIMAGES cache (dataset -> system_details.container_images)"""
with open("../cms-release-info/cms_release_container_images_info.json") as f:
content = json.loads(f.read())
for key in content.keys():
CONTAINERIMAGES_CACHE[key] = content[key]

def create_record(dataset_full_name, doi_info, recid_info, eos_dir, das_dir, mcm_dir, conffiles_dir):
"""Create record for the given dataset."""
Expand All @@ -291,8 +300,6 @@ def create_record(dataset_full_name, doi_info, recid_info, eos_dir, das_dir, mcm
year_created = '2016'
year_published = '2023' #
run_period = ['Run2016G', 'Run2016H'] #
global_tag = get_global_tag(dataset_full_name, mcm_dir)
release = get_cmssw_version(dataset_full_name, das_dir, mcm_dir)

additional_title = 'Simulated dataset ' + dataset + ' in ' + dataset_format + ' format for ' + year_created + ' collision data'

Expand Down Expand Up @@ -422,11 +429,11 @@ def create_record(dataset_full_name, doi_info, recid_info, eos_dir, das_dir, mcm
rec['run_period'] = run_period

# recomended global tag and cmssw release recommended for analysis
recommended_gt = get_recommended_global_tag_for_analysis(dataset_full_name)
recommended_cmssw = get_recommended_cmssw_for_analysis(dataset_full_name)
rec['system_details'] = {}
rec['system_details']['global_tag'] = "106X_dataRun2_v37"
rec['system_details']['release'] = "CMSSW_10_6_30"
rec['system_details']['global_tag'] = recommended_gt
rec['system_details']['release'] = recommended_cmssw
if recommended_cmssw in CONTAINERIMAGES_CACHE.keys():
rec["system_details"]["container_images"] = CONTAINERIMAGES_CACHE[recommended_cmssw]

rec['title'] = dataset_full_name

Expand Down Expand Up @@ -524,6 +531,8 @@ def print_records(records):
def main(datasets, eos_dir, das_dir, mcm_dir, conffiles_dir, doi_file, recid_file):
"Do the job."

populate_containerimages_cache()

records_dir= "./outputs/records-" + dt.now().strftime("%Y-%m")
os.makedirs(records_dir, exist_ok=True)

Expand Down
4 changes: 2 additions & 2 deletions cms-2016-simulated-datasets/code/interface.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,8 +91,8 @@ def main(dataset_list,
$ python ./code/interface.py --create-das-json-store DATASET_LIST
This creates a local cache. It queries DAS (Data Aggregation Service)
for the dataset, parent, config and mcm information and store it in
DAS_DIR/{dataset/,parent/,config/,mcm/}.
for the dataset, parent, and release information and store it in
DAS_DIR/{dataset/,parent/,release/}.
\b
(It takes a lot of time to run, up to ~30 seconds / dataset)
Expand Down
14 changes: 8 additions & 6 deletions cms-2016-simulated-datasets/code/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,8 @@ def get_dataset_year(dataset):
'RunIIFall15MiniAODv2-PU25nsData2015v1FSQ' : 2015,
'RunIIFall15MiniAODv2-PU25nsData2015v1Raw' : 2015,
'RunIISummer16MiniAODv2-PUMoriond17' : 2016,
'RunIISummer20UL16NanoAODv9' : 2016,
'RunIISummer20UL16MiniAODv2' : 2016
}.get(second_name, 0)


Expand Down Expand Up @@ -128,24 +130,24 @@ def get_author_list_recid(dataset):


def get_recommended_global_tag_for_analysis(dataset):
"Return recomended global tag for analysis."
"Return recommended global tag for analysis."
year = get_dataset_year(dataset)
return {
2010: 'START42_V17B::All',
2011: 'START53_LV6A1::All',
2012: 'START53_V27::All',
2015: '', # FIXME
2016: '', # FIXME
2015: '76X_mcRun2_asymptotic_RunIIFall15DR76_v1',
2016: '106X_mcRun2_asymptotic_v17',
}.get(year, '')


def get_recommended_cmssw_for_analysis(dataset):
"Return recomended CMSSW release for analysis."
"Return recommended CMSSW release for analysis for the main pp data."
year = get_dataset_year(dataset)
return {
2010: 'CMSSW_4_2_8',
2011: 'CMSSW_5_3_32',
2012: 'CMSSW_5_3_32',
2015: '', # FIXME
2016: '', # FIXME
2015: 'CMSSW_7_6_7',
2016: 'CMSSW_10_6_30',
}.get(year, '')

0 comments on commit 5014bfd

Please sign in to comment.