Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 6 additions & 4 deletions script/app-mlperf-inference-mlcommons-python/_cm.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ new_env_keys:
- CM_HW_NAME
- CM_ML_MODEL_*
- CM_MAX_EXAMPLES

- CM_VLLM_*
new_state_keys:
- mlperf-inference-implementation
- CM_SUT_*
Expand Down Expand Up @@ -403,9 +403,11 @@ deps:
CM_MODEL:
- llama2-70b-99
- llama2-70b-99.9
skip_if_env:
skip_if_any_env:
CM_MLPERF_CUSTOM_MODEL_PATH:
- "on"
- "on"
CM_MLPERF_INFERENCE_API_SERVER:
- "on"

## mixtral-8x7b
- tags: get,ml-model,mixtral
Expand Down Expand Up @@ -517,7 +519,7 @@ deps:
- stable-diffusion-xl

## OpenOrca for LLAMA2-70b
- tags: get,preprocessed,dataset,openorca,_validation
- tags: get,preprocessed,dataset,openorca,_validation,_mlcommons
names:
- openorca-preprocessed
enable_if_env:
Expand Down
11 changes: 8 additions & 3 deletions script/app-mlperf-inference-mlcommons-python/customize.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ def preprocess(i):
else:
env['CM_MLPERF_LOADGEN_EXTRA_OPTIONS'] += " --mlperf_conf "+ x + env['CM_MLPERF_CONF'] + x

if env.get('CM_NETWORK_LOADGEN', '') != "lon":
if env.get('CM_NETWORK_LOADGEN', '') != "lon" and env.get('CM_MLPERF_INFERENCE_API_SERVER','')=='':
env['MODEL_DIR'] = env.get('CM_ML_MODEL_PATH')
if not env['MODEL_DIR']:
env['MODEL_DIR'] = os.path.dirname(env.get('CM_MLPERF_CUSTOM_MODEL_PATH', env.get('CM_ML_MODEL_FILE_WITH_PATH')))
Expand Down Expand Up @@ -304,8 +304,13 @@ def get_run_cmd_reference(os_info, env, scenario_extra_options, mode_extra_optio
env['CM_MLPERF_LOADGEN_EXTRA_OPTIONS'] + \
scenario_extra_options + mode_extra_options + \
" --output-log-dir " + env['CM_MLPERF_OUTPUT_DIR'] + \
' --dtype ' + env['CM_MLPERF_MODEL_PRECISION'] + \
" --model-path " + env['MODEL_DIR']
' --dtype ' + env['CM_MLPERF_MODEL_PRECISION']
if env.get('CM_MLPERF_INFERENCE_API_SERVER', '') != '':
env['CM_VLLM_SERVER_MODEL_NAME'] = env.get("CM_VLLM_SERVER_MODEL_NAME") or "NousResearch/Meta-Llama-3-8B-Instruct"
#env['CM_MLPERF_INFERENCE_API_SERVER'] = "http://localhost:8000"
cmd += f" --api-server {env['CM_MLPERF_INFERENCE_API_SERVER']} --model-path {env['CM_VLLM_SERVER_MODEL_NAME']} --api-model-name {env['CM_VLLM_SERVER_MODEL_NAME']} --vllm "
else:
cmd += f" --model-path {env['MODEL_DIR']}"
cmd = cmd.replace("--count", "--total-sample-count")

elif "mixtral-8x7b" in env['CM_MODEL']:
Expand Down
21 changes: 14 additions & 7 deletions script/app-mlperf-inference-redhat/_cm.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -107,13 +107,14 @@ deps:

- tags: get,git,repo
names:
inference-results
inference-code
updats_tags_from_env_with_prefix:
_repo.: CM_MLPERF_INFERENCE_RESULTS_REPO
- inference-results
- inference-code
update_tags_from_env_with_prefix:
_repo.:
- CM_MLPERF_INFERENCE_RESULTS_REPO
env:
CM_GIT_CHECKOUT_PATH_ENV_NAME: CM_MLPERF_INFERENCE_IMPLEMENTATION_REPO
extra_cache_tags: inference-implementation,mlperf
extra_cache_tags: results,repo,mlperf

# Post dependencies to run this app including for power measurement
post_deps:
Expand Down Expand Up @@ -241,7 +242,12 @@ variations:
CM_MODEL: gptj-99.9

llama2-70b_:
{}
deps:
- tags: get,dataset,openorca,language-processing,original,_redhat
env:
CM_MLPERF_IMPLEMENTATION: redhat
env:
CM_VLLM_SERVER_MODEL_NAME: NousResearch/Meta-Llama-3-8B-Instruct # assigned just for testing purpose

llama2-70b-99:
group: model
Expand Down Expand Up @@ -292,10 +298,11 @@ variations:
fp32:
group: precision

r4.0_default:
r4.1-dev_default:
group: version
default: true
env:
CM_MLPERF_INFERENCE_RESULTS_REPO: https://github.com/mlcommons/inference_results_v4.0

docker:
real_run: False
29 changes: 28 additions & 1 deletion script/app-mlperf-inference-redhat/customize.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,12 @@ def preprocess(i):
run_dir = r ['run_dir']
print(run_cmd)
print(run_dir)
return {'return':1, 'error': 'Run command needs to be tested'}
env['CM_MLPERF_RUN_CMD'] = run_cmd
env['CM_RUN_DIR'] = run_dir
env['CM_RUN_CMD'] = run_cmd

return {'return':0}
#return {'return':1, 'error': 'Run command needs to be tested'}

def get_run_cmd(model, i):
env = i['env']
Expand All @@ -52,6 +57,28 @@ def get_run_cmd(model, i):
run_dir = os.path.join(env['CM_MLPERF_INFERENCE_IMPLEMENTATION_REPO'], "open", submitter, "code", "gptj-99")

return {'return': 0, 'run_cmd': run_cmd, 'run_dir': run_dir}

if "llama2" in model:
scenario = env['CM_MLPERF_LOADGEN_SCENARIO']
device = env['CM_MLPERF_DEVICE']
mode = env['CM_MLPERF_LOADGEN_MODE']
outdir = env['CM_MLPERF_OUTPUT_DIR']
mlperf_conf_path = env['CM_MLPERF_CONF']
user_conf_path = env['CM_MLPERF_USER_CONF']
api_server = env.get('CM_MLPERF_INFERENCE_API_SERVER', 'localhost:8000/v1')
api_model_name = env['CM_VLLM_SERVER_MODEL_NAME']
dataset_path = env['CM_DATASET_OPENORCA_PATH']
precision = env['CM_MLPERF_MODEL_PRECISION']
if mode == "accuracy":
accuracy_string = " --accuracy "
else:
accuracy_string = ""

run_cmd = f"python3 -u 'main.py' --scenario {scenario} --model-path {api_model_name} --api-model-name {api_model_name} --api-server {api_server} --mlperf-conf {mlperf_conf_path} {accuracy_string} --vllm --user-conf {user_conf_path} --dataset-path {dataset_path} --output-log-dir {outdir} --dtype float32 --device {device} "
submitter = "RedHat-Supermicro"
run_dir = os.path.join(env['CM_MLPERF_INFERENCE_IMPLEMENTATION_REPO'], "open", submitter, "code", model)

return {'return': 0, 'run_cmd': run_cmd, 'run_dir': run_dir}

def postprocess(i):

Expand Down
54 changes: 50 additions & 4 deletions script/get-preprocessed-dataset-openorca/_cm.json
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,13 @@
"openorca-original",
"dataset-original"
],
"tags": "get,dataset,original,openorca"
"tags": "get,dataset,original,openorca",
"skip_if_env": {
"CM_DATASET_PREPROCESSED_BY_MLC": [
"on",
"yes"
]
}
},
{
"force_env_keys": [
Expand All @@ -33,7 +39,13 @@
"names": [
"inference-src"
],
"tags": "mlperf,inference,source"
"tags": "mlperf,inference,source",
"skip_if_env": {
"CM_DATASET_PREPROCESSED_BY_MLC": [
"on",
"yes"
]
}
},
{
"tags": "get,generic-python-lib,_package.pyarrow",
Expand All @@ -54,7 +66,14 @@
]
},
{
"tags": "get,ml-model,llama2"
"tags": "get,ml-model,llama2",
"skip_if_env": {
"CM_DATASET_PREPROCESSED_BY_MLC": [
"on",
"yes"
]
}

}
],
"env": {
Expand All @@ -73,7 +92,6 @@
"uid": "5614c39cb1564d72",
"variations": {
"60": {
"default": true,
"ad": {
"dataset-original": {
"tags": "_60"
Expand All @@ -88,6 +106,7 @@
"group": "dataset-type"
},
"full": {
"default": true,
"ad": {
"dataset-original": {
"tags": "_full"
Expand All @@ -109,6 +128,33 @@
"CM_DATASET_CALIBRATION": "no"
},
"group": "dataset-type"
},
"mlcommons": {
"env": {
"CM_DATASET_PREPROCESSED_BY_MLC": "yes",
"CM_RCLONE_CONFIG_CMD": "rclone config create mlc-inference s3 provider=Cloudflare access_key_id=f65ba5eef400db161ea49967de89f47b secret_access_key=fbea333914c292b854f14d3fe232bad6c5407bf0ab1bebf78833c2b359bdfd2b endpoint=https://c2686074cb2caf5cbaf6d134bdba8b47.r2.cloudflarestorage.com",
"CM_RCLONE_URL": "mlc-inference:mlcommons-inference-wg-public/open_orca"
},
"deps": [
{
"env": {
"CM_DOWNLOAD_FINAL_ENV_NAME": "CM_OPENORCA_PREPROCESSED_ROOT",
"CM_EXTRACT_FINAL_ENV_NAME": "CM_OPENORCA_PREPROCESSED_ROOT",
"CM_EXTRACT_TO_FOLDER": "openorca-preprocessed"
},
"tags": "download-and-extract,_rclone",
"update_tags_from_env_with_prefix": {
"_url.": [
"CM_RCLONE_URL"
]
},
"force_cache": true,
"names": [
"dae"
],
"extra_cache_tags": "openorca,preprocessed,dataset"
}
]
}
},
"docker": {
Expand Down
23 changes: 16 additions & 7 deletions script/get-preprocessed-dataset-openorca/customize.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,21 +5,30 @@
def preprocess(i):

env = i['env']
inference_src = env['CM_MLPERF_INFERENCE_SOURCE']

run_dir = os.path.join(inference_src, 'language', 'llama2-70b')
model_dir = env['CM_ML_MODEL_PATH']
run_cmd = env['CM_PYTHON_BIN_WITH_PATH'] + ' processorca.py --dataset_pq_path=' + env['CM_DATASET_OPENORCA_PARQUET'] + ' --model_dir=' + model_dir +' --seqlen_limit=2048 --export_dir=' + os.path.join(os.getcwd(), "processed-openorca") + ' --num_total_samples=' + env['CM_DATASET_SIZE']
if str(env.get('CM_DATASET_PREPROCESSED_BY_MLC','')).lower() in [ "yes", "1", "true" ]:
run_dir = os.getcwd()
env['CM_DATASET_PREPROCESSED_PATH'] = os.path.join(env['CM_OPENORCA_PREPROCESSED_ROOT'], "open_orca_gpt4_tokenized_llama.sampled_24576.pkl.gz")
#run_cmd = f"gunzip -k {env['CM_DATASET_PREPROCESSED_PATH']}"
run_cmd = ''
else:
inference_src = env['CM_MLPERF_INFERENCE_SOURCE']
run_dir = os.path.join(inference_src, 'language', 'llama2-70b')
model_dir = env['CM_ML_MODEL_PATH']
run_cmd = env['CM_PYTHON_BIN_WITH_PATH'] + ' processorca.py --dataset_pq_path=' + env['CM_DATASET_OPENORCA_PARQUET'] + ' --model_dir=' + model_dir +' --seqlen_limit=2048 --export_dir=' + os.path.join(os.getcwd(), "processed-openorca") + ' --num_total_samples=' + env['CM_DATASET_SIZE']

env['CM_RUN_DIR'] = run_dir
env['CM_RUN_CMD'] = run_cmd



return {'return': 0}

def postprocess(i):
env = i['env']
env['CM_DATASET_PREPROCESSED_PATH'] = os.path.join(os.path.join(os.getcwd(), "processed-openorca", 'open_orca_gpt4_tokenized_llama.sampled_'+env['CM_DATASET_SIZE']+'.pkl'))
if str(env.get('CM_DATASET_PREPROCESSED_BY_MLC','')).lower() in [ "yes", "1", "true", "on" ]:
pass #set in preprocess
else:
env['CM_DATASET_PREPROCESSED_PATH'] = os.path.join(os.path.join(os.getcwd(), "processed-openorca", 'open_orca_gpt4_tokenized_llama.sampled_'+env['CM_DATASET_SIZE']+'.pkl'))

env['CM_GET_DEPENDENT_CACHED_PATH'] = env['CM_DATASET_PREPROCESSED_PATH']

return {'return': 0}
7 changes: 6 additions & 1 deletion script/process-mlperf-accuracy/_cm.json
Original file line number Diff line number Diff line change
Expand Up @@ -323,7 +323,12 @@
"names": [
"llama2-model"
],
"tags": "get,ml-model,llama2"
"tags": "get,ml-model,llama2",
"skip_if_env": {
"CM_MLPERF_INFERENCE_API_SERVER": [
"on"
]
}
}
],
"env": {
Expand Down
6 changes: 5 additions & 1 deletion script/process-mlperf-accuracy/customize.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,11 @@ def preprocess(i):
elif dataset == "openorca":
accuracy_checker_file = os.path.join(env['CM_MLPERF_INFERENCE_SOURCE'], "language", "llama2-70b",
"evaluate-accuracy.py")
CMD = env['CM_PYTHON_BIN_WITH_PATH'] + " '" + accuracy_checker_file + "' --checkpoint-path '" + env['CM_ML_MODEL_LLAMA2_FILE_WITH_PATH'] + "' --mlperf-accuracy-file '" + os.path.join(result_dir, "mlperf_log_accuracy.json") + \
if env.get('CM_VLLM_SERVER_MODEL_NAME','') == '':
checkpoint_path = env['CM_ML_MODEL_LLAMA2_FILE_WITH_PATH']
else:
checkpoint_path = env['CM_VLLM_SERVER_MODEL_NAME']
CMD = env['CM_PYTHON_BIN_WITH_PATH'] + " '" + accuracy_checker_file + "' --checkpoint-path '" + checkpoint_path + "' --mlperf-accuracy-file '" + os.path.join(result_dir, "mlperf_log_accuracy.json") + \
"' --dataset-file '" + env['CM_DATASET_PREPROCESSED_PATH'] + "'"+ " --dtype " + env.get('CM_ACCURACY_DTYPE', "int32") +" > '" + out_file + "'"

elif dataset == "openorca-gsm8k-mbxp-combined":
Expand Down
2 changes: 2 additions & 0 deletions script/run-mlperf-inference-app/_cm.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ default_env:
CM_MLPERF_RUN_STYLE: test

input_mapping:
api_server: CM_MLPERF_INFERENCE_API_SERVER
backend: CM_MLPERF_BACKEND
batch_size: CM_MLPERF_LOADGEN_MAX_BATCHSIZE
beam_size: GPTJ_BEAM_SIZE
Expand Down Expand Up @@ -98,6 +99,7 @@ input_mapping:
sut: CM_MLPERF_INFERENCE_SUT_VARIATION
nvidia_llama2_dataset_file_path: CM_NVIDIA_LLAMA_DATASET_FILE_PATH
tp_size: CM_NVIDIA_TP_SIZE
vllm_model_name: CM_VLLM_SERVER_MODEL_NAME

new_state_keys:
- app_mlperf_inference_*
Expand Down