Skip to content
This repository was archived by the owner on Jun 3, 2025. It is now read-only.

Commit 05e55e5

Browse files
GeorgeDanny Guinther
andauthored
Request V2 files from graphql (#361)
* Request V2 files from graphql * Change underscores to dashes in archive names * download to appropriate destination * better error handling * test edits * comments * undo comment --------- Co-authored-by: Danny Guinther <dguinther@neuralmagic.com>
1 parent 8035607 commit 05e55e5

File tree

9 files changed

+84
-102
lines changed

9 files changed

+84
-102
lines changed

src/sparsezoo/api/query_parser.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -121,8 +121,10 @@ def _parse_fields(self) -> None:
121121
def parse_list_fields_to_string(self, fields: List[str]) -> str:
122122
parsed_fields = ""
123123
for field in fields:
124-
camel_case_field = to_camel_case(field)
125-
parsed_fields += f"{camel_case_field} "
124+
field_without_arguments, sep, args = field.partition("(")
125+
camel_case_field = to_camel_case(field_without_arguments)
126+
args_str = f"{sep}{args}" if args else ""
127+
parsed_fields += f"{camel_case_field}{args_str} "
126128
if camel_case_field in DEFAULT_FIELDS:
127129
stringified_fields = self.parse_list_fields_to_string(
128130
DEFAULT_FIELDS.get(camel_case_field)

src/sparsezoo/model/model.py

Lines changed: 10 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -110,20 +110,20 @@ def __init__(self, source: str, download_path: Optional[str] = None):
110110
self.sample_originals: Directory = self._directory_from_files(
111111
files,
112112
directory_class=Directory,
113-
display_name="sample_originals",
113+
display_name="sample-originals",
114114
)
115115
self.sample_inputs: NumpyDirectory = self._directory_from_files(
116116
files,
117117
directory_class=NumpyDirectory,
118-
display_name="sample_inputs",
118+
display_name="sample-inputs",
119119
)
120120

121121
self.model_card: File = self._file_from_files(files, display_name="model.md")
122122

123123
self.sample_outputs = self._directory_from_files(
124124
files,
125125
directory_class=NumpyDirectory,
126-
display_name="sample_outputs",
126+
display_name="sample-outputs",
127127
allow_multiple_outputs=True,
128128
regex=True,
129129
)
@@ -133,7 +133,7 @@ def __init__(self, source: str, download_path: Optional[str] = None):
133133
] = self._sample_outputs_list_to_dict(self.sample_outputs)
134134

135135
self.sample_labels: Directory = self._directory_from_files(
136-
files, directory_class=Directory, display_name="sample_labels"
136+
files, directory_class=Directory, display_name="sample-labels"
137137
)
138138

139139
self.deployment: SelectDirectory = self._directory_from_files(
@@ -150,12 +150,9 @@ def __init__(self, source: str, download_path: Optional[str] = None):
150150

151151
self.logs: Directory = self._directory_from_files(files, display_name="logs")
152152

153-
self.recipes: SelectDirectory = self._directory_from_files(
154-
files,
155-
directory_class=SelectDirectory,
156-
display_name="recipe",
157-
stub_params=self.stub_params,
158-
)
153+
self.recipes = self._file_from_files(files, display_name="^recipe", regex=True)
154+
if isinstance(self.recipes, File):
155+
self.recipes = [self.recipes]
159156

160157
self._onnx_gz: OnnxGz = self._directory_from_files(
161158
files, directory_class=OnnxGz, display_name="model.onnx.tar.gz"
@@ -691,7 +688,7 @@ def _sample_outputs_list_to_dict(
691688
if not isinstance(directories, list):
692689
# if found a single 'sample_outputs' directory,
693690
# assume it should be mapped to its the native framework
694-
expected_name = "sample_outputs"
691+
expected_name = "sample-outputs"
695692
if directories.name not in [expected_name, expected_name + ".tar.gz"]:
696693
raise ValueError(
697694
"Found single folder (or tar.gz archive)"
@@ -701,7 +698,7 @@ def _sample_outputs_list_to_dict(
701698
engine_to_numpydir_map["framework"] = directories
702699

703700
else:
704-
# if found multiple 'sample_outputs' directories,
701+
# if found multiple 'sample-outputs' directories,
705702
# use directory name to relate it with the appropriate
706703
# inference engine
707704
for directory in directories:
@@ -710,7 +707,7 @@ def _sample_outputs_list_to_dict(
710707
engine_name = engine_name.replace(".tar.gz", "")
711708
if engine_name not in ENGINES:
712709
raise ValueError(
713-
f"The name of the 'sample_outputs' directory should "
710+
f"The name of the 'sample-outputs' directory should "
714711
f"end with an engine name (one of the {ENGINES}). "
715712
f"However, the name is {directory.name}."
716713
)

src/sparsezoo/model/utils.py

Lines changed: 9 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -142,7 +142,7 @@ def load_files_from_stub(
142142
fields=[
143143
"model_id",
144144
"model_onnx_size_compressed_bytes",
145-
"files",
145+
"files(version: 2)",
146146
"benchmark_results",
147147
"training_results",
148148
"repo_name",
@@ -168,9 +168,11 @@ def load_files_from_stub(
168168
model_id = model["model_id"]
169169

170170
files = model.get("files")
171+
if len(files) == 0:
172+
raise ValueError(f"No files found for stub {stub}")
173+
171174
include_file_download_url(files)
172175
files = restructure_request_json(request_json=files)
173-
174176
if params is not None:
175177
files = filter_files(files=files, params=params)
176178

@@ -308,7 +310,7 @@ def save_outputs_to_tar(
308310

309311
path = os.path.join(
310312
os.path.dirname(sample_inputs.path),
311-
f"sample_outputs_{engine_type}",
313+
f"sample-outputs_{engine_type}",
312314
)
313315
if not os.path.exists(path):
314316
os.mkdir(path)
@@ -382,26 +384,14 @@ def restructure_request_json(
382384
file_dict_deployment["file_type"] = "deployment"
383385
request_json.append(file_dict_deployment)
384386

385-
# create recipes
386-
recipe_dicts_list = fetch_from_request_json(request_json, "file_type", "recipe")
387-
for (idx, file_dict) in recipe_dicts_list:
388-
display_name = file_dict["display_name"]
389-
# make sure that recipe name has a
390-
# format `recipe_{...}`.
391-
prefix = "recipe_"
392-
if not display_name.startswith(prefix):
393-
display_name = prefix + display_name
394-
file_dict["display_name"] = display_name
395-
request_json[idx] = file_dict
396-
397387
# restructure inputs/labels/originals/outputs directories
398388
# use `sample-inputs.tar.gz` to simulate non-existent directories
399389

400390
files_to_create = [
401-
"sample_inputs.tar.gz",
402-
"sample_labels.tar.gz",
403-
"sample_originals.tar.gz",
404-
"sample_outputs.tar.gz",
391+
"sample-inputs.tar.gz",
392+
"sample-labels.tar.gz",
393+
"sample-originals.tar.gz",
394+
"sample-outputs.tar.gz",
405395
]
406396
types = ["inputs", "labels", "originals", "outputs"]
407397
for file_name, type in zip(files_to_create, types):

src/sparsezoo/objects/directory.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -194,7 +194,7 @@ def download(
194194
else:
195195
for file in self.files:
196196
file.download(
197-
destination_path=os.path.join(destination_path, self.name)
197+
destination_path=destination_path,
198198
)
199199
file._path = os.path.join(destination_path, self.name, file.name)
200200

tests/sparsezoo/analyze/bert_pruned80_quant-none-vnni.json

Lines changed: 1 addition & 0 deletions
Large diffs are not rendered by default.

tests/sparsezoo/analyze/bert_pruned_quantized.json

Lines changed: 0 additions & 1 deletion
This file was deleted.

tests/sparsezoo/analyze/helpers.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -50,9 +50,9 @@
5050
"bert_pruned_quantized": {
5151
"stub": (
5252
"zoo:nlp/question_answering/bert-base/pytorch/huggingface/squad/"
53-
"12layer_pruned80_quant-none-vnni"
53+
"pruned80_quant-none-vnni"
5454
),
55-
"truth": f"{os.path.dirname(__file__)}/bert_pruned_quantized.json",
55+
"truth": f"{os.path.dirname(__file__)}/bert_pruned80_quant-none-vnni.json",
5656
},
5757
"resnet50_pruned_quantized": {
5858
"stub": (

tests/sparsezoo/model/test_model.py

Lines changed: 24 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -32,15 +32,14 @@
3232
"onnx",
3333
"model.onnx",
3434
"model.onnx.tar.gz",
35-
"recipe",
36-
"sample_inputs.tar.gz",
37-
"sample_originals.tar.gz",
38-
"sample_labels.tar.gz",
39-
"sample_outputs.tar.gz",
40-
"sample_inputs",
41-
"sample_originals",
42-
"sample_labels",
43-
"sample_outputs",
35+
"sample-inputs.tar.gz",
36+
"sample-originals.tar.gz",
37+
"sample-labels.tar.gz",
38+
"sample-outputs.tar.gz",
39+
"sample-inputs",
40+
"sample-originals",
41+
"sample-labels",
42+
"sample-outputs",
4443
"benchmarks.yaml",
4544
"eval.yaml",
4645
"analysis.yaml",
@@ -116,9 +115,7 @@ def test_model_from_stub(self, setup):
116115

117116
@staticmethod
118117
def _assert_correct_files_downloaded(model, args):
119-
if args[0] == "recipe":
120-
assert len(model.recipes.available) == 1
121-
elif args[0] == "checkpoint":
118+
if args[0] == "checkpoint":
122119
assert len(model.training.available) == 1
123120
elif args[0] == "deployment":
124121
assert len(model.training.available) == 1
@@ -141,7 +138,7 @@ def _assert_validation_results_exist(model):
141138
"pytorch/sparseml/imagenet/pruned-moderate"
142139
),
143140
True,
144-
files_ic,
141+
files_ic.union({"recipe.md", "recipe_transfer_learn.md"}),
145142
),
146143
(
147144
(
@@ -150,7 +147,7 @@ def _assert_validation_results_exist(model):
150147
"pytorch/huggingface/squad/pruned80_quant-none-vnni"
151148
),
152149
False,
153-
files_nlp,
150+
files_nlp.union({"recipe.md"}),
154151
),
155152
(
156153
(
@@ -159,22 +156,22 @@ def _assert_validation_results_exist(model):
159156
"pytorch/ultralytics/coco/pruned_quant-aggressive_94"
160157
),
161158
True,
162-
files_yolo,
159+
files_yolo.union({"recipe.md", "recipe_transfer_learn.md"}),
163160
),
164161
(
165162
"yolov5-x-coco-pruned70.4block_quantized",
166163
False,
167-
files_yolo,
164+
files_yolo.union({"recipe.md", "recipe_transfer_learn.md"}),
168165
),
169166
(
170167
"yolov5-n6-voc_coco-pruned55",
171168
False,
172-
files_yolo,
169+
files_yolo.union({"recipe.md"}),
173170
),
174171
(
175172
"resnet_v1-50-imagenet-channel30_pruned90_quantized",
176173
False,
177-
files_yolo,
174+
files_yolo.union({"recipe.md", "recipe_transfer_classification.md"}),
178175
),
179176
],
180177
scope="function",
@@ -196,11 +193,10 @@ def test_folder_structure(self, setup):
196193
_, clone_sample_outputs, expected_files, temp_dir = setup
197194
if clone_sample_outputs:
198195
for file_name in [
199-
"sample_outputs_onnxruntime",
200-
"sample_outputs_deepsparse",
196+
"sample-outputs_onnxruntime",
197+
"sample-outputs_deepsparse",
201198
]:
202199
expected_files.update({file_name, file_name + ".tar.gz"})
203-
204200
assert not set(os.listdir(temp_dir.name)).difference(expected_files)
205201

206202
def test_validate(self, setup):
@@ -246,19 +242,19 @@ def _add_mock_files(directory_path: str, clone_sample_outputs: bool):
246242
)
247243
Path(optional_recipe_yaml).touch()
248244

249-
# add remaining `sample_{...}` files, that may be potentially
245+
# add remaining `sample-{...}` files, that may be potentially
250246
# missing
251-
mock_sample_file = os.path.join(directory_path, "sample_inputs.tar.gz")
252-
for file_name in ["sample_originals.tar.gz", "sample_labels.tar.gz"]:
247+
mock_sample_file = os.path.join(directory_path, "sample-inputs.tar.gz")
248+
for file_name in ["sample-originals.tar.gz", "sample-labels.tar.gz"]:
253249
expected_file_dir = os.path.join(directory_path, file_name)
254250
if not os.path.isfile(expected_file_dir):
255251
shutil.copyfile(mock_sample_file, expected_file_dir)
256252

257253
if clone_sample_outputs:
258-
sample_outputs_file = os.path.join(directory_path, "sample_outputs.tar.gz")
254+
sample_outputs_file = os.path.join(directory_path, "sample-outputs.tar.gz")
259255
for file_name in [
260-
"sample_outputs_onnxruntime.tar.gz",
261-
"sample_outputs_deepsparse.tar.gz",
256+
"sample-outputs_onnxruntime.tar.gz",
257+
"sample-outputs_deepsparse.tar.gz",
262258
]:
263259
shutil.copyfile(
264260
sample_outputs_file, os.path.join(directory_path, file_name)
@@ -271,12 +267,11 @@ def _test_generate_outputs_single_engine(self, engine, model_directory):
271267
if engine == "onnxruntime":
272268
# test whether the functionality saves the numpy files to tar properly
273269
tar_file_expected_path = os.path.join(
274-
directory_path, f"sample_outputs_{engine}.tar.gz"
270+
directory_path, f"sample-outputs_{engine}.tar.gz"
275271
)
276272
if os.path.isfile(tar_file_expected_path):
277273
os.remove(tar_file_expected_path)
278274
save_to_tar = True
279-
280275
output_expected = next(iter(model_directory.sample_outputs[engine]))
281276
output_expected = list(output_expected.values())
282277
output = next(

0 commit comments

Comments
 (0)