-
Notifications
You must be signed in to change notification settings - Fork 29
add offload to disk #124
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Merged
add offload to disk #124
Changes from all commits
Commits
Show all changes
2 commits
Select commit
Hold shift + click to select a range
File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -41,6 +41,7 @@ def __init__( | |
| self.offload_mode = None | ||
| self.model_names = [] | ||
| self._offload_param_dict = {} | ||
| self.offload_to_disk = False | ||
|
|
||
| @classmethod | ||
| def from_pretrained(cls, model_path_or_config: str | BaseConfig) -> "BasePipeline": | ||
|
|
@@ -228,19 +229,23 @@ def eval(self): | |
| model.eval() | ||
| return self | ||
|
|
||
| def enable_cpu_offload(self, offload_mode: str): | ||
| valid_offload_mode = ("cpu_offload", "sequential_cpu_offload") | ||
| def enable_cpu_offload(self, offload_mode: str | None, offload_to_disk:bool = False): | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 另外是正交方法的话应该拆成另外一个函数,而不是作为这个函数的参数会更好一些 |
||
| valid_offload_mode = ("cpu_offload", "sequential_cpu_offload", "disable", None) | ||
| if offload_mode not in valid_offload_mode: | ||
| raise ValueError(f"offload_mode must be one of {valid_offload_mode}, but got {offload_mode}") | ||
| if self.device == "cpu" or self.device == "mps": | ||
| logger.warning("must set an non cpu device for pipeline before calling enable_cpu_offload") | ||
| return | ||
| if offload_mode == "cpu_offload": | ||
| if offload_mode is None or offload_mode == "disable": | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 都是在初始化的时候设置的,感觉disable这个选项没啥用呀 |
||
| self._disable_offload() | ||
| elif offload_mode == "cpu_offload": | ||
| self._enable_model_cpu_offload() | ||
| elif offload_mode == "sequential_cpu_offload": | ||
| self._enable_sequential_cpu_offload() | ||
| self.offload_to_disk = offload_to_disk | ||
|
|
||
| def _enable_model_cpu_offload(self): | ||
|
|
||
| def _enable_model_cpu_offload(self): | ||
| for model_name in self.model_names: | ||
| model = getattr(self, model_name) | ||
| if model is not None: | ||
|
|
@@ -253,13 +258,23 @@ def _enable_sequential_cpu_offload(self): | |
| if model is not None: | ||
| enable_sequential_cpu_offload(model, self.device) | ||
| self.offload_mode = "sequential_cpu_offload" | ||
|
|
||
| def _disable_offload(self): | ||
| self.offload_mode = None | ||
| self._offload_param_dict = {} | ||
| for model_name in self.model_names: | ||
| model = getattr(self, model_name) | ||
| if model is not None: | ||
| model.to(self.device) | ||
|
|
||
|
|
||
| def enable_fp8_autocast( | ||
| self, model_names: List[str], compute_dtype: torch.dtype = torch.bfloat16, use_fp8_linear: bool = False | ||
| ): | ||
| for model_name in model_names: | ||
| model = getattr(self, model_name) | ||
| if model is not None: | ||
| model.to(device=self.device, dtype=torch.float8_e4m3fn) | ||
| enable_fp8_autocast(model, compute_dtype, use_fp8_linear) | ||
| self.fp8_autocast_enabled = True | ||
|
|
||
|
|
@@ -282,10 +297,26 @@ def load_models_to_device(self, load_model_names: List[str] | None = None): | |
| # load the needed models to device | ||
| for model_name in load_model_names: | ||
| model = getattr(self, model_name) | ||
| if model is None: | ||
| raise ValueError(f"model {model_name} is not loaded, maybe this model has been destroyed by model_lifecycle_finish function with offload_to_disk=True") | ||
| if model is not None and (p := next(model.parameters(), None)) is not None and p.device.type != self.device: | ||
| model.to(self.device) | ||
| # fresh the cuda cache | ||
| empty_cache() | ||
|
|
||
| def model_lifecycle_finish(self, model_names: List[str] | None = None): | ||
| if not self.offload_to_disk or self.offload_mode is None: | ||
| return | ||
| for model_name in model_names: | ||
| model = getattr(self, model_name) | ||
| del model | ||
| if model_name in self._offload_param_dict: | ||
| del self._offload_param_dict[model_name] | ||
| setattr(self, model_name, None) | ||
| print(f"model {model_name} has been deleted from memory") | ||
| logger.info(f"model {model_name} has been deleted from memory") | ||
| empty_cache() | ||
|
|
||
|
|
||
| def compile(self): | ||
| raise NotImplementedError(f"{self.__class__.__name__} does not support compile") | ||
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
感觉应该换个名字,看了一下实现我觉得跟offload是两个正交的能力,这个名字太困惑了。一般理解上的offload_to_disk是真的在磁盘上存点数据,然后通过一个方法能还原回来,但是现在实现上没有这么一个方法;而且我们的目的是节省一次性运行过程中的内存占用,应该也不需要这样的能力,结合model lifecycle换给名字会更好理解一些