Request V2 files from graphql (#361)

George · Danny Guinther · web-flow · commit 05e55e55bc6c · 2023-09-07T16:36:46.000-04:00
* Request V2 files from graphql

* Change underscores to dashes in archive names

* download to appropriate destination

* better error handling

* test edits

* comments

* undo comment

---------

Co-authored-by: Danny Guinther &lt;dguinther@neuralmagic.com&gt;
diff --git a/src/sparsezoo/api/query_parser.py b/src/sparsezoo/api/query_parser.py
@@ -121,8 +121,10 @@ def _parse_fields(self) -> None:
     def parse_list_fields_to_string(self, fields: List[str]) -> str:
         parsed_fields = ""
         for field in fields:
-            camel_case_field = to_camel_case(field)
-            parsed_fields += f"{camel_case_field} "
+            field_without_arguments, sep, args = field.partition("(")
+            camel_case_field = to_camel_case(field_without_arguments)
+            args_str = f"{sep}{args}" if args else ""
+            parsed_fields += f"{camel_case_field}{args_str} "
             if camel_case_field in DEFAULT_FIELDS:
                 stringified_fields = self.parse_list_fields_to_string(
                     DEFAULT_FIELDS.get(camel_case_field)
diff --git a/src/sparsezoo/model/model.py b/src/sparsezoo/model/model.py
@@ -110,20 +110,20 @@ def __init__(self, source: str, download_path: Optional[str] = None):
         self.sample_originals: Directory = self._directory_from_files(
             files,
             directory_class=Directory,
-            display_name="sample_originals",
+            display_name="sample-originals",
         )
         self.sample_inputs: NumpyDirectory = self._directory_from_files(
             files,
             directory_class=NumpyDirectory,
-            display_name="sample_inputs",
+            display_name="sample-inputs",
         )
 
         self.model_card: File = self._file_from_files(files, display_name="model.md")
 
         self.sample_outputs = self._directory_from_files(
             files,
             directory_class=NumpyDirectory,
-            display_name="sample_outputs",
+            display_name="sample-outputs",
             allow_multiple_outputs=True,
             regex=True,
         )
@@ -133,7 +133,7 @@ def __init__(self, source: str, download_path: Optional[str] = None):
             ] = self._sample_outputs_list_to_dict(self.sample_outputs)
 
         self.sample_labels: Directory = self._directory_from_files(
-            files, directory_class=Directory, display_name="sample_labels"
+            files, directory_class=Directory, display_name="sample-labels"
         )
 
         self.deployment: SelectDirectory = self._directory_from_files(
@@ -150,12 +150,9 @@ def __init__(self, source: str, download_path: Optional[str] = None):
 
         self.logs: Directory = self._directory_from_files(files, display_name="logs")
 
-        self.recipes: SelectDirectory = self._directory_from_files(
-            files,
-            directory_class=SelectDirectory,
-            display_name="recipe",
-            stub_params=self.stub_params,
-        )
+        self.recipes = self._file_from_files(files, display_name="^recipe", regex=True)
+        if isinstance(self.recipes, File):
+            self.recipes = [self.recipes]
 
         self._onnx_gz: OnnxGz = self._directory_from_files(
             files, directory_class=OnnxGz, display_name="model.onnx.tar.gz"
@@ -691,7 +688,7 @@ def _sample_outputs_list_to_dict(
         if not isinstance(directories, list):
             # if found a single 'sample_outputs' directory,
             # assume it should be mapped to its the native framework
-            expected_name = "sample_outputs"
+            expected_name = "sample-outputs"
             if directories.name not in [expected_name, expected_name + ".tar.gz"]:
                 raise ValueError(
                     "Found single folder (or tar.gz archive)"
@@ -701,7 +698,7 @@ def _sample_outputs_list_to_dict(
             engine_to_numpydir_map["framework"] = directories
 
         else:
-            # if found multiple 'sample_outputs' directories,
+            # if found multiple 'sample-outputs' directories,
             # use directory name to relate it with the appropriate
             # inference engine
             for directory in directories:
@@ -710,7 +707,7 @@ def _sample_outputs_list_to_dict(
                     engine_name = engine_name.replace(".tar.gz", "")
                 if engine_name not in ENGINES:
                     raise ValueError(
-                        f"The name of the 'sample_outputs' directory should "
+                        f"The name of the 'sample-outputs' directory should "
                         f"end with an engine name (one of the {ENGINES}). "
                         f"However, the name is {directory.name}."
                     )
diff --git a/src/sparsezoo/model/utils.py b/src/sparsezoo/model/utils.py
@@ -142,7 +142,7 @@ def load_files_from_stub(
         fields=[
             "model_id",
             "model_onnx_size_compressed_bytes",
-            "files",
+            "files(version: 2)",
             "benchmark_results",
             "training_results",
             "repo_name",
@@ -168,9 +168,11 @@ def load_files_from_stub(
         model_id = model["model_id"]
 
         files = model.get("files")
+        if len(files) == 0:
+            raise ValueError(f"No files found for stub {stub}")
+
         include_file_download_url(files)
         files = restructure_request_json(request_json=files)
-
         if params is not None:
             files = filter_files(files=files, params=params)
 
@@ -308,7 +310,7 @@ def save_outputs_to_tar(
 
     path = os.path.join(
         os.path.dirname(sample_inputs.path),
-        f"sample_outputs_{engine_type}",
+        f"sample-outputs_{engine_type}",
     )
     if not os.path.exists(path):
         os.mkdir(path)
@@ -382,26 +384,14 @@ def restructure_request_json(
             file_dict_deployment["file_type"] = "deployment"
             request_json.append(file_dict_deployment)
 
-    # create recipes
-    recipe_dicts_list = fetch_from_request_json(request_json, "file_type", "recipe")
-    for (idx, file_dict) in recipe_dicts_list:
-        display_name = file_dict["display_name"]
-        # make sure that recipe name has a
-        # format `recipe_{...}`.
-        prefix = "recipe_"
-        if not display_name.startswith(prefix):
-            display_name = prefix + display_name
-            file_dict["display_name"] = display_name
-            request_json[idx] = file_dict
-
     # restructure inputs/labels/originals/outputs directories
     # use `sample-inputs.tar.gz` to simulate non-existent directories
 
     files_to_create = [
-        "sample_inputs.tar.gz",
-        "sample_labels.tar.gz",
-        "sample_originals.tar.gz",
-        "sample_outputs.tar.gz",
+        "sample-inputs.tar.gz",
+        "sample-labels.tar.gz",
+        "sample-originals.tar.gz",
+        "sample-outputs.tar.gz",
     ]
     types = ["inputs", "labels", "originals", "outputs"]
     for file_name, type in zip(files_to_create, types):
diff --git a/src/sparsezoo/objects/directory.py b/src/sparsezoo/objects/directory.py
@@ -194,7 +194,7 @@ def download(
         else:
             for file in self.files:
                 file.download(
-                    destination_path=os.path.join(destination_path, self.name)
+                    destination_path=destination_path,
                 )
                 file._path = os.path.join(destination_path, self.name, file.name)
 
diff --git a/tests/sparsezoo/analyze/bert_pruned80_quant-none-vnni.json b/tests/sparsezoo/analyze/bert_pruned80_quant-none-vnni.json
diff --git a/tests/sparsezoo/analyze/bert_pruned_quantized.json b/tests/sparsezoo/analyze/bert_pruned_quantized.json
diff --git a/tests/sparsezoo/analyze/helpers.py b/tests/sparsezoo/analyze/helpers.py
@@ -50,9 +50,9 @@
     "bert_pruned_quantized": {
         "stub": (
             "zoo:nlp/question_answering/bert-base/pytorch/huggingface/squad/"
-            "12layer_pruned80_quant-none-vnni"
+            "pruned80_quant-none-vnni"
         ),
-        "truth": f"{os.path.dirname(__file__)}/bert_pruned_quantized.json",
+        "truth": f"{os.path.dirname(__file__)}/bert_pruned80_quant-none-vnni.json",
     },
     "resnet50_pruned_quantized": {
         "stub": (
diff --git a/tests/sparsezoo/model/test_model.py b/tests/sparsezoo/model/test_model.py
@@ -32,15 +32,14 @@
     "onnx",
     "model.onnx",
     "model.onnx.tar.gz",
-    "recipe",
-    "sample_inputs.tar.gz",
-    "sample_originals.tar.gz",
-    "sample_labels.tar.gz",
-    "sample_outputs.tar.gz",
-    "sample_inputs",
-    "sample_originals",
-    "sample_labels",
-    "sample_outputs",
+    "sample-inputs.tar.gz",
+    "sample-originals.tar.gz",
+    "sample-labels.tar.gz",
+    "sample-outputs.tar.gz",
+    "sample-inputs",
+    "sample-originals",
+    "sample-labels",
+    "sample-outputs",
     "benchmarks.yaml",
     "eval.yaml",
     "analysis.yaml",
@@ -116,9 +115,7 @@ def test_model_from_stub(self, setup):
 
     @staticmethod
     def _assert_correct_files_downloaded(model, args):
-        if args[0] == "recipe":
-            assert len(model.recipes.available) == 1
-        elif args[0] == "checkpoint":
+        if args[0] == "checkpoint":
             assert len(model.training.available) == 1
         elif args[0] == "deployment":
             assert len(model.training.available) == 1
@@ -141,7 +138,7 @@ def _assert_validation_results_exist(model):
                 "pytorch/sparseml/imagenet/pruned-moderate"
             ),
             True,
-            files_ic,
+            files_ic.union({"recipe.md", "recipe_transfer_learn.md"}),
         ),
         (
             (
@@ -150,7 +147,7 @@ def _assert_validation_results_exist(model):
                 "pytorch/huggingface/squad/pruned80_quant-none-vnni"
             ),
             False,
-            files_nlp,
+            files_nlp.union({"recipe.md"}),
         ),
         (
             (
@@ -159,22 +156,22 @@ def _assert_validation_results_exist(model):
                 "pytorch/ultralytics/coco/pruned_quant-aggressive_94"
             ),
             True,
-            files_yolo,
+            files_yolo.union({"recipe.md", "recipe_transfer_learn.md"}),
         ),
         (
             "yolov5-x-coco-pruned70.4block_quantized",
             False,
-            files_yolo,
+            files_yolo.union({"recipe.md", "recipe_transfer_learn.md"}),
         ),
         (
             "yolov5-n6-voc_coco-pruned55",
             False,
-            files_yolo,
+            files_yolo.union({"recipe.md"}),
         ),
         (
             "resnet_v1-50-imagenet-channel30_pruned90_quantized",
             False,
-            files_yolo,
+            files_yolo.union({"recipe.md", "recipe_transfer_classification.md"}),
         ),
     ],
     scope="function",
@@ -196,11 +193,10 @@ def test_folder_structure(self, setup):
         _, clone_sample_outputs, expected_files, temp_dir = setup
         if clone_sample_outputs:
             for file_name in [
-                "sample_outputs_onnxruntime",
-                "sample_outputs_deepsparse",
+                "sample-outputs_onnxruntime",
+                "sample-outputs_deepsparse",
             ]:
                 expected_files.update({file_name, file_name + ".tar.gz"})
-
         assert not set(os.listdir(temp_dir.name)).difference(expected_files)
 
     def test_validate(self, setup):
@@ -246,19 +242,19 @@ def _add_mock_files(directory_path: str, clone_sample_outputs: bool):
             )
             Path(optional_recipe_yaml).touch()
 
-        # add remaining `sample_{...}` files, that may be potentially
+        # add remaining `sample-{...}` files, that may be potentially
         # missing
-        mock_sample_file = os.path.join(directory_path, "sample_inputs.tar.gz")
-        for file_name in ["sample_originals.tar.gz", "sample_labels.tar.gz"]:
+        mock_sample_file = os.path.join(directory_path, "sample-inputs.tar.gz")
+        for file_name in ["sample-originals.tar.gz", "sample-labels.tar.gz"]:
             expected_file_dir = os.path.join(directory_path, file_name)
             if not os.path.isfile(expected_file_dir):
                 shutil.copyfile(mock_sample_file, expected_file_dir)
 
         if clone_sample_outputs:
-            sample_outputs_file = os.path.join(directory_path, "sample_outputs.tar.gz")
+            sample_outputs_file = os.path.join(directory_path, "sample-outputs.tar.gz")
             for file_name in [
-                "sample_outputs_onnxruntime.tar.gz",
-                "sample_outputs_deepsparse.tar.gz",
+                "sample-outputs_onnxruntime.tar.gz",
+                "sample-outputs_deepsparse.tar.gz",
             ]:
                 shutil.copyfile(
                     sample_outputs_file, os.path.join(directory_path, file_name)
@@ -271,12 +267,11 @@ def _test_generate_outputs_single_engine(self, engine, model_directory):
         if engine == "onnxruntime":
             # test whether the functionality saves the numpy files to tar properly
             tar_file_expected_path = os.path.join(
-                directory_path, f"sample_outputs_{engine}.tar.gz"
+                directory_path, f"sample-outputs_{engine}.tar.gz"
             )
             if os.path.isfile(tar_file_expected_path):
                 os.remove(tar_file_expected_path)
             save_to_tar = True
-
         output_expected = next(iter(model_directory.sample_outputs[engine]))
         output_expected = list(output_expected.values())
         output = next(
diff --git a/tests/sparsezoo/model/test_utils.py b/tests/sparsezoo/model/test_utils.py

Original file line number	Diff line number	Diff line change
`@@ -194,7 +194,7 @@ def download(`
`194`	`194`	`else:`
`195`	`195`	`for file in self.files:`
`196`	`196`	`file.download(`
`197`		`- destination_path=os.path.join(destination_path, self.name)`
	`197`	`+ destination_path=destination_path,`
`198`	`198`	`)`
`199`	`199`	`file._path = os.path.join(destination_path, self.name, file.name)`
`200`	`200`