Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update for v4.1: Add new seeds + update checker + update compliance test table #1736

Merged
merged 1 commit into from
Jun 20, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions compliance/nvidia/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -37,5 +37,6 @@ The `run_verification.py` found in each test directory will copy the test files
| 3d-unet | [TEST01](./TEST01/), [TEST05](./TEST05/) |
| rnnt | [TEST01](./TEST01/), [TEST05](./TEST05/) |
| gpt-j | - |
| stable-diffusion-xl | - |
| Llama2-70b | [TEST06]() |
| stable-diffusion-xl | [TEST01](./TEST01/), [TEST04](./TEST04/), [TEST05](./TEST05/) |
| Llama2-70b | [TEST06](./TEST06/) |
| mixtral-8x7b | [TEST06](./TEST06/) |
12 changes: 6 additions & 6 deletions mlperf.conf
Original file line number Diff line number Diff line change
Expand Up @@ -19,13 +19,13 @@ stable-diffusion-xl.*.performance_sample_count_override = 5000
3d-unet.*.performance_sample_count_override = 0

# Set seeds. The seeds will be distributed two weeks before the submission.
*.*.qsl_rng_seed = 13281865557512327830
*.*.sample_index_rng_seed = 198141574272810017
*.*.schedule_rng_seed = 7575108116881280410
*.*.qsl_rng_seed = 3066443479025735752
*.*.sample_index_rng_seed = 10688027786191513374
*.*.schedule_rng_seed = 14962580496156340209
# Set seeds for TEST_05. The seeds will be distributed two weeks before the submission.
*.*.test05_qsl_rng_seed = 2376919268182438552
*.*.test05_sample_index_rng_seed = 11176391829184272374
*.*.test05_schedule_rng_seed = 3911940905271271337
*.*.test05_qsl_rng_seed = 16799458546791641818
*.*.test05_sample_index_rng_seed = 5453809927556429288
*.*.test05_schedule_rng_seed = 5435552105434836064


*.SingleStream.target_latency_percentile = 90
Expand Down
2 changes: 1 addition & 1 deletion text_to_image/tools/sample_ids.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ def get_args():
"--n", type=int, default=10, help="Dataset download location"
)
parser.add_argument(
"--seed", "-s", type=int, default=926019364, help="Dataset download location"
"--seed", "-s", type=int, default=633994880, help="Dataset download location"
)
args = parser.parse_args()
return args
Expand Down
20 changes: 10 additions & 10 deletions text_to_image/tools/sample_ids.txt
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
4459
4015
2705
1682
4048
4683
3757
1578
3319
95
4655
2569
1303
109
4509
3009
2179
1826
2094
3340
234 changes: 219 additions & 15 deletions tools/submission/submission_checker.py
Original file line number Diff line number Diff line change
Expand Up @@ -186,6 +186,169 @@
"stable-diffusion-xl": {"SingleStream": 1024, "Server": 270336, "Offline": 1}
},
},
"v4.1": {
"models": [
"resnet",
"retinanet",
"bert-99",
"bert-99.9",
"dlrm-v2-99",
"dlrm-v2-99.9",
"3d-unet-99",
"3d-unet-99.9",
"gptj-99",
"gptj-99.9",
"llama2-70b-99",
"llama2-70b-99.9",
"stable-diffusion-xl",
"mixtral-8x7b"
],
"required-scenarios-datacenter": {
"resnet": ["Server", "Offline"],
"retinanet": ["Server", "Offline"],
"bert-99": ["Server", "Offline"],
"bert-99.9": ["Server", "Offline"],
"dlrm-v2-99": ["Server", "Offline"],
"dlrm-v2-99.9": ["Server", "Offline"],
"3d-unet-99": ["Offline"],
"3d-unet-99.9": ["Offline"],
"gptj-99": ["Server", "Offline"],
"gptj-99.9": ["Server", "Offline"],
"llama2-70b-99": ["Server", "Offline"],
"llama2-70b-99.9": ["Server", "Offline"],
"stable-diffusion-xl": ["Server", "Offline"],
"mixtral-8x7b": ["Server", "Offline"]
},
"optional-scenarios-datacenter": {},
"required-scenarios-edge": {
"resnet": ["SingleStream", "MultiStream", "Offline"],
"retinanet": ["SingleStream", "MultiStream", "Offline"],
"bert-99": ["SingleStream", "Offline"],
"3d-unet-99": ["SingleStream", "Offline"],
"3d-unet-99.9": ["SingleStream", "Offline"],
"gptj-99": ["SingleStream", "Offline"],
"gptj-99.9": ["SingleStream", "Offline"],
"stable-diffusion-xl": ["SingleStream", "Offline"],
},
"optional-scenarios-edge": {},
"required-scenarios-datacenter-edge": {
"resnet": ["SingleStream", "Offline", "MultiStream", "Server"],
"retinanet": ["SingleStream", "Offline", "MultiStream", "Server"],
"bert-99": ["SingleStream", "Offline", "Server"],
"bert-99.9": ["Offline", "Server"],
"dlrm-v2-99": ["Offline", "Server"],
"dlrm-v2-99.9": ["Offline", "Server"],
"3d-unet-99": ["SingleStream", "Offline"],
"3d-unet-99.9": ["SingleStream", "Offline"],
"gptj-99": ["SingleStream", "Offline", "Server"],
"gptj-99.9": ["SingleStream", "Offline", "Server"],
"llama2-70b-99": ["Server", "Offline"],
"llama2-70b-99.9": ["Server", "Offline"],
"stable-diffusion-xl": ["SingleStream", "Offline", "Server"],
"mixtral-8x7b": ["SingleStream""Server", "Offline"]
},
"optional-scenarios-datacenter-edge": {},
"accuracy-target": {
"resnet": ("acc", 76.46 * 0.99),
"retinanet": ("mAP", 37.55 * 0.99),
"bert-99": ("F1", 90.874 * 0.99),
"bert-99.9": ("F1", 90.874 * 0.999),
"dlrm-v2-99": ("AUC", 80.31 * 0.99),
"dlrm-v2-99.9": ("AUC", 80.31 * 0.999),
"3d-unet-99": ("DICE", 0.86170 * 0.99),
"3d-unet-99.9": ("DICE", 0.86170 * 0.999),
"gptj-99" : ("ROUGE1", 42.9865 * 0.99, "ROUGE2", 20.1235 * 0.99, "ROUGEL", 29.9881 * 0.99, "GEN_LEN", 4016878*0.9),
"gptj-99.9" : ("ROUGE1", 42.9865 * 0.999, "ROUGE2", 20.1235 * 0.999, "ROUGEL", 29.9881 * 0.999, "GEN_LEN", 4016878*0.9),
"llama2-70b-99" : ("ROUGE1", 44.4312 * 0.99, "ROUGE2", 22.0352 * 0.99, "ROUGEL", 28.6162 * 0.99, "TOKENS_PER_SAMPLE", 294.45*0.9),
"llama2-70b-99.9" : ("ROUGE1", 44.4312 * 0.999, "ROUGE2", 22.0352 * 0.999, "ROUGEL", 28.6162 * 0.999, "TOKENS_PER_SAMPLE", 294.45*0.9),
"stable-diffusion-xl": ("CLIP_SCORE", 31.68631873, "FID_SCORE", 23.01085758),
# TODO: Mixtral metrics
# "mixtral-8x7b" : ("ROUGE1", X * 0.99, "ROUGE2", X * 0.99, "ROUGEL", X * 0.99, "TOKENS_PER_SAMPLE", X * 0.9, "gsm8k_accuracy": 73.78*0.99, "mbxp_accuracy": 60.12 * 0.99),
},
"accuracy-upper-limit": {
"stable-diffusion-xl": ("CLIP_SCORE", 31.81331801, "FID_SCORE", 23.95007626),
"llama2-70b-99" : ("TOKENS_PER_SAMPLE", 294.45*1.1),
"llama2-70b-99.9" : ("TOKENS_PER_SAMPLE", 294.45*1.1)
# "mixtral-8x7b" :("TOKENS_PER_SAMPLE", X * 0.9)
},
"performance-sample-count": {
"resnet": 1024,
"retinanet": 64,
"bert-99": 10833,
"bert-99.9": 10833,
"dlrm-v2-99": 204800,
"dlrm-v2-99.9": 204800,
"3d-unet-99": 43,
"3d-unet-99.9": 43,
"gptj-99": 13368,
"gptj-99.9": 13368,
"llama2-70b-99": 24576,
"llama2-70b-99.9": 24576,
"stable-diffusion-xl": 5000,
"mixtral-8x7b": 15000,
},
# TODO: Update this list.
"model_mapping": {
# map model names to the official mlperf model class
"ssd-resnet34": "retinanet",
"mobilenet": "resnet",
"resnet50": "resnet"
},
"seeds": {
# TODO: Update random seeds
"qsl_rng_seed": 3066443479025735752,
"sample_index_rng_seed": 10688027786191513374,
"schedule_rng_seed": 14962580496156340209,
},
"test05_seeds": {
# TODO: Update random seeds
"qsl_rng_seed": 16799458546791641818,
"sample_index_rng_seed": 5453809927556429288,
"schedule_rng_seed": 5435552105434836064,
},
"ignore_errors": [],
"latency-constraint": {
"resnet": {"Server": 15000000},
"retinanet": {"Server": 100000000},
"bert-99": {"Server": 130000000},
"bert-99.9": {"Server": 130000000},
"dlrm-v2-99": {"Server": 60000000},
"dlrm-v2-99.9": {"Server": 60000000},
"gptj-99": {"Server": 20000000000},
"gptj-99.9": {"Server": 20000000000},
"llama2-70b-99": {"Server": 20000000000},
"llama2-70b-99.9": {"Server": 20000000000},
"stable-diffusion-xl" : {"Server": 20000000000}
# TODO: Mixtral metrics
# "mixtral-8x7b" : {"Server": 20000000000}
},
"min-queries": {
"resnet": {
"SingleStream": 1024,
"MultiStream": 270336,
"Server": 270336,
"Offline": 1,
},
"retinanet": {
"SingleStream": 1024,
"MultiStream": 270336,
"Server": 270336,
"Offline": 1,
},
"bert-99": {"SingleStream": 1024, "Server": 270336, "Offline": 1},
"bert-99.9": {"SingleStream": 1024, "Server": 270336, "Offline": 1},
"dlrm-v2-99": {"Server": 270336, "Offline": 1},
"dlrm-v2-99.9": {"Server": 270336, "Offline": 1},
"3d-unet-99": {"SingleStream": 1024, "Offline": 1},
"3d-unet-99.9": {"SingleStream": 1024, "Offline": 1},
"gptj-99": {"SingleStream": 1024, "Server": 270336, "Offline": 1},
"gptj-99.9": {"SingleStream": 1024, "Server": 270336, "Offline": 1},
"llama2-70b-99": {"SingleStream": 1024, "Server": 270336, "Offline": 1},
"llama2-70b-99.9": {"SingleStream": 1024, "Server": 270336, "Offline": 1},
"stable-diffusion-xl": {"SingleStream": 1024, "Server": 270336, "Offline": 1},
"mixtral-8x7b": {"SingleStream": 1024, "Server": 270336, "Offline": 1},
},
},
}

VALID_DIVISIONS = ["open", "closed", "network"]
Expand Down Expand Up @@ -221,6 +384,20 @@
"3319",
"95"
]
},
"v4.1": {
"images": [
"4655",
"2569",
"1303",
"109",
"4509",
"3009",
"2179",
"1826",
"2094",
"3340"
]
}
}
}
Expand Down Expand Up @@ -255,7 +432,8 @@
"gptj-99.9": 13368,
"llama2-70b-99": 24576,
"llama2-70b-99.9": 24576,
"stable-diffusion-xl": 5000
"stable-diffusion-xl": 5000,
"mixtral-8x7b": 15000
}

SCENARIO_MAPPING = {
Expand Down Expand Up @@ -302,8 +480,8 @@
},
"v4.1": {
"llama2-70b-99": {
"Offline": "result_tokens_per_second",
"Server": "result_completed_tokens_per_second",
"Offline": "result_tokens_per_second",
"Server": "result_completed_tokens_per_second",
},
"llama2-70b-99.9": {
"Offline": "result_tokens_per_second",
Expand All @@ -316,16 +494,33 @@
"gptj-99.9": {
"Offline": "result_inferred_tokens_per_second",
"Server": "result_inferred_completed_tokens_per_second",
},
"mixtral-8x7b": {
"Offline": "result_tokens_per_second",
"Server": "result_completed_tokens_per_second",
}
}
}

LLAMA2_LATENCY_LIMITS = {
# We might add interactive in the next round. Latency in ns
"conversational": {
"ttft": 2000 * 1000000,
"tpot": 200 * 1000000
}
LLM_LATENCY_LIMITS = {
"llama2-70b-99":{
"conversational": {
"ttft": 2000 * 1000000,
"tpot": 200 * 1000000
}
},
"llama2-70b-99.9":{
"conversational": {
"ttft": 2000 * 1000000,
"tpot": 200 * 1000000
}
},
# "mixtral-8x7b":{
# "conversational": {
# "ttft": 2000 * 1000000,
# "tpot": 200 * 1000000
# }
# }
}

ACC_PATTERN = {
Expand Down Expand Up @@ -799,13 +994,13 @@ def check_accuracy_dir(config, model, path, verbose):
return is_valid, result_acc


def extra_check_llama2(mlperf_log, scenario):
def extra_check_llm(mlperf_log, scenario, model):
if (mlperf_log["requested_use_token_latencies"]):
if scenario == "Offline":
# For offline no further checks are necessary
return None, True
else:
for constraint, limits in LLAMA2_LATENCY_LIMITS.items():
for constraint, limits in LLM_LATENCY_LIMITS[model].items():
if mlperf_log["result_first_token_99.00_percentile_latency_ns"] < limits["ttft"] and mlperf_log["result_time_per_output_token_99.00_percentile_ns"] < limits["tpot"]:
return constraint, True
else:
Expand Down Expand Up @@ -867,8 +1062,8 @@ def check_performance_dir(
res = float(mlperf_log[RESULT_FIELD_BENCHMARK_OVERWRITE[version][model][scenario]])


if model in ["llama2-70b-99", "llama2-70b-99.9"]:
llama_constraint, is_valid = extra_check_llama2(mlperf_log, scenario_fixed)
if model in ["llama2-70b-99", "llama2-70b-99.9", "mixtral-8x7b"]:
llama_constraint, is_valid = extra_check_llm(mlperf_log, scenario_fixed, model)

latency_99_percentile = mlperf_log["result_99.00_percentile_latency_ns"]
latency_mean = mlperf_log["result_mean_latency_ns"]
Expand Down Expand Up @@ -2344,8 +2539,7 @@ def check_compliance_dir(
"gptj-99.9",
"llama2-70b-99",
"llama2-70b-99.9",
"stable-diffusion-xl"

"mixtral-8x7b"
]:
test_list.remove("TEST04")

Expand All @@ -2355,13 +2549,23 @@ def check_compliance_dir(
"llama2-70b-99",
"llama2-70b-99.9",
"stable-diffusion-xl"
"mixtral-8x7b"
]:
test_list.remove("TEST05")

if model in [
"gptj-99",
"gptj-99.9",
"llama2-70b-99",
"llama2-70b-99.9",
"mixtral-8x7b"
]:
test_list.remove("TEST01")

if model in [
"llama2-70b-99",
"llama2-70b-99.9",
"mixtral-8x7b"
]:
test_list.append("TEST06")

Expand Down
Loading