Skip to content

File artifacts #194

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
Jul 18, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions cascade/models/basic_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,13 +113,13 @@ def load(cls, path: str, check_hash: bool = True) -> "BasicModel":
def save(self, path: str) -> None:
"""
Saves model to the path provided
Also copies any additional files in the model folder.

Path should be a folder, which will be created
if not exists and saves there as `model.pkl`
"""
if not os.path.isdir(path):
raise ValueError(f"Error when saving a model - {path} is not a folder")
super().save(path)

os.makedirs(path, exist_ok=True)
path = os.path.join(path, "model.pkl")

with open(path, "wb") as f:
Expand Down
70 changes: 68 additions & 2 deletions cascade/models/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@
limitations under the License.
"""

import os
from shutil import copyfile
import warnings
from typing import Any, Union

Expand Down Expand Up @@ -43,6 +45,8 @@ def __init__(
self.metrics = {}
self.params = kwargs
self.created_at = pendulum.now(tz="UTC")
self._file_artifacts_paths = []
self._file_artifact_missing_oks = []
# Model accepts meta_prefix explicitly to not to record it in 'params'
super().__init__(*args, meta_prefix=meta_prefix, **kwargs)

Expand Down Expand Up @@ -75,9 +79,54 @@ def load(cls, path: str, *args: Any, **kwargs: Any) -> "Model":

def save(self, path: str, *args: Any, **kwargs: Any) -> None:
"""
Saves model wrapper's state using provided filepath
Does additional saving routines. Call this if you call
save() in any subclass.

Creates the folder,
copies file artifacts added by add_file
automatically

Parameters
----------
path : str
Path to the model folder

Raises
------
ValueError
If the path is not a folder
FileNotFoundError
If the file that should be copied does not exists and
it is not ok. See `add_file` for more info.

See also
--------
cascade.models.Model.add_file
"""
raise_not_implemented("cascade.models.Model", "save")
os.makedirs(path, exist_ok=True)

if not hasattr(self, "_file_artifacts_paths"):
warnings.warn(
"Failed to perform basic Model.save since some attributes are missing"
"maybe you haven't call super().__init__ in Model's subclass?"
)
return

for filepath, but_its_ok in zip(
self._file_artifacts_paths, self._file_artifact_missing_oks
):
if not os.path.exists(filepath):
if but_its_ok:
continue
raise FileNotFoundError(
f"File {filepath} not found when trying to copy an artifact of model {self.slug}"
)
filename = os.path.split(filepath)[-1]

files_folder = os.path.join(path, "files")
os.makedirs(files_folder, exist_ok=True)

copyfile(filepath, os.path.join(files_folder, filename))

def load_artifact(self, path: str, *args: Any, **kwargs: Any) -> None:
"""
Expand Down Expand Up @@ -113,6 +162,23 @@ def get_meta(self) -> PipeMeta:

return meta

def add_file(self, path: str, missing_ok: bool = False) -> None:
"""
Add additional file artifact to the model
Copy the file to the model folder when saving model.

Parameters
----------
path : str
Path to the file to be copied. Can be
missing at the time of the call, but should be
present when calling save()
missing_ok : bool, optional
If it is okay when the file does not exist. Raises an error if False, by default False
"""
self._file_artifacts_paths.append(path)
self._file_artifact_missing_oks.append(missing_ok)


class ModelModifier(Model):
"""
Expand Down
21 changes: 7 additions & 14 deletions cascade/models/model_line.py
Original file line number Diff line number Diff line change
Expand Up @@ -143,21 +143,7 @@ def save(self, model: Model, only_meta: bool = False) -> None:
os.makedirs(model_folder)
break

model.save(os.path.join(self._root, folder_name))

meta = model.get_meta()
if not only_meta:
artifacts_folder = os.path.join(self._root, folder_name, "artifacts")
os.makedirs(artifacts_folder)
model.save_artifact(artifacts_folder)

# exact_filename = exact_filename[0]
# with open(exact_filename, "rb") as f:
# md5sum = md5(f.read()).hexdigest()

# meta[0]["name"] = exact_filename
# meta[0]["md5sum"] = md5sum

meta[0]["path"] = os.path.join(self._root, folder_name)
meta[0]["saved_at"] = pendulum.now(tz="UTC")
self.model_names.append(folder_name)
Expand All @@ -166,6 +152,13 @@ def save(self, model: Model, only_meta: bool = False) -> None:
os.path.join(self._root, folder_name, "meta" + self._meta_fmt), meta
)

model.save(os.path.join(self._root, folder_name))

if not only_meta:
artifacts_folder = os.path.join(self._root, folder_name, "artifacts")
os.makedirs(artifacts_folder)
model.save_artifact(artifacts_folder)

self._update_meta()

def __repr__(self) -> str:
Expand Down
57 changes: 57 additions & 0 deletions cascade/tests/test_model.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
"""
Copyright 2022-2023 Ilia Moiseev

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""

import os
import sys

import pytest

MODULE_PATH = os.path.dirname(os.path.abspath(os.path.dirname(__file__)))
sys.path.append(os.path.dirname(MODULE_PATH))

from cascade.models import Model


def test_add_file(tmp_path):
tmp_path = str(tmp_path)

message = "Hello I am artifact"
filepath = os.path.join(tmp_path, "file.txt")
with open(filepath, "w") as f:
f.write(message)

model = Model()
model.add_file(filepath)
model.save(os.path.join(tmp_path, "model"))

with open(os.path.join(tmp_path, "model", "files", "file.txt"), "r") as f:
read_message = f.read()

assert read_message == message


def test_add_missing_file(tmp_path):
tmp_path = str(tmp_path)

model = Model()
model.add_file("iammissing.jpg")

with pytest.raises(FileNotFoundError):
model.save(os.path.join(tmp_path, "model"))

model = Model()
model.add_file("iammissingtoobutitsok.jpg", missing_ok=True)
model.save(os.path.join(tmp_path, "model"))
16 changes: 5 additions & 11 deletions cascade/utils/baselines/constant_baseline.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,9 @@
limitations under the License.
"""

import json
from typing import Any, List

from ...base import PipeMeta
from ...models import BasicModel


Expand All @@ -41,13 +41,7 @@ def predict(self, x: Any, *args: Any, **kwargs: Any) -> List[Any]:
"""
return [self._constant for _ in range(len(x))]

def save(self, path: str) -> None:
with open(path, "w") as f:
json.dump({"constant": self._constant}, f)

@classmethod
def load(cls, path: str) -> "ConstantBaseline":
with open(path, "r") as f:
obj = json.load(f)
model = ConstantBaseline(obj["constant"])
return model
def get_meta(self) -> PipeMeta:
meta = super().get_meta()
meta[0]["constant"] = self._constant
return meta
10 changes: 1 addition & 9 deletions cascade/utils/sklearn/sk_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,10 +83,7 @@ def save(self, path: str) -> None:
--------
cascade.utils.sklearn.SkModel.save_artifact
"""
if not os.path.isdir(path):
raise ValueError(f"Error when saving a model - {path} is not a folder")

os.makedirs(path, exist_ok=True)
super().save(path)
model_path = os.path.join(path, "model.pkl")

pipeline = self._pipeline
Expand All @@ -104,11 +101,6 @@ def save_artifact(self, path: str, *args: Any, **kwargs: Any) -> None:
----------
path : str
the folder in which to save pipeline.pkl

Raises
------
ValueError
if the path is not a valid directory
"""
if not os.path.isdir(path):
raise ValueError(f"Error when saving an artifact - {path} is not a folder")
Expand Down
6 changes: 2 additions & 4 deletions cascade/utils/torch/torch_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,12 +76,10 @@ def save(self, path: str, *args: Any, **kwargs: Any) -> None:
--------
cascade.utils.torch.TorchModel.save_artifact
"""
if not os.path.isdir(path):
raise ValueError(f"Error when saving a model - {path} is not a folder")

os.makedirs(path, exist_ok=True)
super().save(path)
model_path = os.path.join(path, "model.pkl")

# Save without torch artifact
model = self._model
del self._model
with open(model_path, "wb") as f:
Expand Down