Separate model and inference script tar.gz not working.

In reference to: https://github.com/huggingface/notebooks/blob/main/sagemaker/17_custom_inference_script/sagemaker-notebook.ipynb

For HF diffusers library we need to upload custom inference script. The issue with bundling model and code together is that it takes a lot of time to create and push model.tar.zip everytime you need to change code.

When I am zipping my model and code together(like below), things work fine.

```
huggingface_model = HuggingFaceModel(
   model_data="s3://abc/xyz/model.tar.gz",      # s3 path having both model and code
   role=role,                    # iam role with permissions to create an Endpoint
   transformers_version="4.17",  # transformers version used
   pytorch_version="1.10",       # pytorch version used
   py_version='py38',            # python version used
)
```
On un-zipping, folder structure will be like below:

```
model.tar.gz
  |
   - stable-diffusion
  |
   - controlnet
  |
   - code
  ```

As mentioned before, this works fine.

Since creating zip of code and model together means that zipping and uploading time is very huge. So I want to de-couple model and code and tried below.

```
huggingface_model = HuggingFaceModel(
   model_data="s3://abc/xyz/model.tar.gz",      # path to your model
   source_dir="s3://abc/xyz/sourcedir.tar.gz",       # path to you script
   entry_point="inference.py",
   role=role,                    # iam role with permissions to create an Endpoint
   transformers_version="4.17",  # transformers version used
   pytorch_version="1.10",       # pytorch version used
   py_version='py38',            # python version used
)
```

Here on unzipping, `model.tar.gz` will give you `stable-diffusion` and `controlnet`. And unzipping `sourcedir.tar.gz` will give you `inference.py` and `requirements.txt`.

This is giving following error:
```
---------------------------------------------------------------------------
OSError                                   Traceback (most recent call last)
Cell In[13], line 16
      5 huggingface_model = HuggingFaceModel(
      6    model_data=s3_model_uri,      # path to your model
      7    source_dir=s3_code_uri,       # parth to you script
   (...)
     12    py_version='py38',            # python version used
     13 )
     15 # deploy the endpoint endpoint
---> 16 predictor = huggingface_model.deploy(
     17     initial_instance_count=1,
     18     instance_type="ml.g4dn.xlarge"
     19     )

File /opt/conda/lib/python3.10/site-packages/sagemaker/huggingface/model.py:313, in HuggingFaceModel.deploy(self, initial_instance_count, instance_type, serializer, deserializer, accelerator_type, endpoint_name, tags, kms_key, wait, data_capture_config, async_inference_config, serverless_inference_config, volume_size, model_data_download_timeout, container_startup_health_check_timeout, inference_recommendation_id, explainer_config, **kwargs)
    306     inference_tool = "neuron" if instance_type.startswith("ml.inf1") else "neuronx"
    307     self.image_uri = self.serving_image_uri(
    308         region_name=self.sagemaker_session.boto_session.region_name,
    309         instance_type=instance_type,
    310         inference_tool=inference_tool,
    311     )
--> 313 return super(HuggingFaceModel, self).deploy(
    314     initial_instance_count,
    315     instance_type,
    316     serializer,
    317     deserializer,
    318     accelerator_type,
    319     endpoint_name,
    320     tags,
    321     kms_key,
    322     wait,
    323     data_capture_config,
    324     async_inference_config,
    325     serverless_inference_config,
    326     volume_size=volume_size,
    327     model_data_download_timeout=model_data_download_timeout,
    328     container_startup_health_check_timeout=container_startup_health_check_timeout,
    329     inference_recommendation_id=inference_recommendation_id,
    330     explainer_config=explainer_config,
    331 )

File /opt/conda/lib/python3.10/site-packages/sagemaker/model.py:1406, in Model.deploy(self, initial_instance_count, instance_type, serializer, deserializer, accelerator_type, endpoint_name, tags, kms_key, wait, data_capture_config, async_inference_config, serverless_inference_config, volume_size, model_data_download_timeout, container_startup_health_check_timeout, inference_recommendation_id, explainer_config, **kwargs)
   1403     if self._base_name is not None:
   1404         self._base_name = "-".join((self._base_name, compiled_model_suffix))
-> 1406 self._create_sagemaker_model(
   1407     instance_type=instance_type,
   1408     accelerator_type=accelerator_type,
   1409     tags=tags,
   1410     serverless_inference_config=serverless_inference_config,
   1411 )
   1413 serverless_inference_config_dict = (
   1414     serverless_inference_config._to_request_dict() if is_serverless else None
   1415 )
   1416 production_variant = sagemaker.production_variant(
   1417     self.name,
   1418     instance_type,
   (...)
   1424     container_startup_health_check_timeout=container_startup_health_check_timeout,
   1425 )

File /opt/conda/lib/python3.10/site-packages/sagemaker/model.py:794, in Model._create_sagemaker_model(self, instance_type, accelerator_type, tags, serverless_inference_config)
    768 def _create_sagemaker_model(
    769     self,
    770     instance_type=None,
   (...)
    773     serverless_inference_config=None,
    774 ):
    775     """Create a SageMaker Model Entity
    776 
    777     Args:
   (...)
    792             not provided in serverless inference. So this is used to find image URIs.
    793     """
--> 794     container_def = self.prepare_container_def(
    795         instance_type,
    796         accelerator_type=accelerator_type,
    797         serverless_inference_config=serverless_inference_config,
    798     )
    800     if not isinstance(self.sagemaker_session, PipelineSession):
    801         # _base_name, model_name are not needed under PipelineSession.
    802         # the model_data may be Pipeline variable
    803         # which may break the _base_name generation
    804         model_uri = None

File /opt/conda/lib/python3.10/site-packages/sagemaker/huggingface/model.py:498, in HuggingFaceModel.prepare_container_def(self, instance_type, accelerator_type, serverless_inference_config, inference_tool)
    489     deploy_image = self.serving_image_uri(
    490         region_name,
    491         instance_type,
   (...)
    494         inference_tool=inference_tool,
    495     )
    497 deploy_key_prefix = model_code_key_prefix(self.key_prefix, self.name, deploy_image)
--> 498 self._upload_code(deploy_key_prefix, repack=True)
    499 deploy_env = dict(self.env)
    500 deploy_env.update(self._script_mode_env_vars())

File /opt/conda/lib/python3.10/site-packages/sagemaker/model.py:723, in Model._upload_code(self, key_prefix, repack)
    707     self.uploaded_code = fw_utils.UploadedCode(
    708         s3_prefix=repacked_model_data,
    709         script_name=os.path.basename(self.entry_point),
    710     )
    712 LOGGER.info(
    713     "Repacking model artifact (%s), script artifact "
    714     "(%s), and dependencies (%s) "
   (...)
    720     repacked_model_data,
    721 )
--> 723 utils.repack_model(
    724     inference_script=self.entry_point,
    725     source_directory=self.source_dir,
    726     dependencies=self.dependencies,
    727     model_uri=self.model_data,
    728     repacked_model_uri=repacked_model_data,
    729     sagemaker_session=self.sagemaker_session,
    730     kms_key=self.model_kms_key,
    731 )
    733 self.repacked_model_data = repacked_model_data

File /opt/conda/lib/python3.10/site-packages/sagemaker/utils.py:517, in repack_model(inference_script, source_directory, dependencies, model_uri, repacked_model_uri, sagemaker_session, kms_key)
    510 local_download_dir = (
    511     None
    512     if sagemaker_session.settings is None
    513     or sagemaker_session.settings.local_download_dir is None
    514     else sagemaker_session.settings.local_download_dir
    515 )
    516 with _tmpdir(directory=local_download_dir) as tmp:
--> 517     model_dir = _extract_model(model_uri, sagemaker_session, tmp)
    519     _create_or_update_code_dir(
    520         model_dir,
    521         inference_script,
   (...)
    525         tmp,
    526     )
    528     tmp_model_path = os.path.join(tmp, "temp-model.tar.gz")

File /opt/conda/lib/python3.10/site-packages/sagemaker/utils.py:607, in _extract_model(model_uri, sagemaker_session, tmp)
    605     local_model_path = model_uri.replace("file://", "")
    606 with tarfile.open(name=local_model_path, mode="r:gz") as t:
--> 607     t.extractall(path=tmp_model_dir)
    608 return tmp_model_dir

File /opt/conda/lib/python3.10/tarfile.py:2059, in TarFile.extractall(self, path, members, numeric_owner)
   2057         tarinfo.mode = 0o700
   2058     # Do not set_attrs directories, as we will do that further down
-> 2059     self.extract(tarinfo, path, set_attrs=not tarinfo.isdir(),
   2060                  numeric_owner=numeric_owner)
   2062 # Reverse sort directories.
   2063 directories.sort(key=lambda a: a.name)

File /opt/conda/lib/python3.10/tarfile.py:2100, in TarFile.extract(self, member, path, set_attrs, numeric_owner)
   2097     tarinfo._link_target = os.path.join(path, tarinfo.linkname)
   2099 try:
-> 2100     self._extract_member(tarinfo, os.path.join(path, tarinfo.name),
   2101                          set_attrs=set_attrs,
   2102                          numeric_owner=numeric_owner)
   2103 except OSError as e:
   2104     if self.errorlevel > 0:

File /opt/conda/lib/python3.10/tarfile.py:2173, in TarFile._extract_member(self, tarinfo, targetpath, set_attrs, numeric_owner)
   2170     self._dbg(1, tarinfo.name)
   2172 if tarinfo.isreg():
-> 2173     self.makefile(tarinfo, targetpath)
   2174 elif tarinfo.isdir():
   2175     self.makedir(tarinfo, targetpath)

File /opt/conda/lib/python3.10/tarfile.py:2222, in TarFile.makefile(self, tarinfo, targetpath)
   2220     target.truncate()
   2221 else:
-> 2222     copyfileobj(source, target, tarinfo.size, ReadError, bufsize)

File /opt/conda/lib/python3.10/tarfile.py:251, in copyfileobj(src, dst, length, exception, bufsize)
    249     if len(buf) < bufsize:
    250         raise exception("unexpected end of data")
--> 251     dst.write(buf)
    253 if remainder != 0:
    254     buf = src.read(remainder)

OSError: [Errno 28] No space left on device
```

I have confirmed that this is not a space issue by trying this on freshly setup sagemaker domain and using bigger machines.


Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Separate model and inference script tar.gz not working. #449

Metadata

Assignees

Labels

Type

Projects

Milestone

Relationships

Development

Separate model and inference script tar.gz not working. #449

Description

Metadata

Metadata

Assignees

Labels

Type

Projects

Milestone

Relationships

Development

Issue actions