feature(pu): add uz finetune config

opendilab · puyuan1996 · Jul 5, 2024 · Jul 8, 2024 · Jul 11, 2024 · Jul 11, 2024
commit 4b195eb58071ceddf856125ad2bc35936a4f2725
diff --git a/lzero/policy/unizero_multitask.py b/lzero/policy/unizero_multitask.py
@@ -1,7 +1,7 @@
 import copy
 import sys
 from collections import defaultdict
-from typing import List, Dict, Tuple, Union
+from typing import List, Dict, Any, Tuple, Union
 
 import numpy as np
 import torch
@@ -1224,3 +1224,102 @@ def recompute_pos_emb_diff_and_clear_cache(self) -> None:
             model.world_model.precompute_pos_emb_diff_kv()
             model.world_model.clear_caches()
         torch.cuda.empty_cache()
+
+    def _state_dict_learn(self) -> Dict[str, Any]:
+        """
+        Overview:
+            Return the state_dict of learn mode, usually including model, target_model and optimizer.
+        Returns:
+            - state_dict (:obj:`Dict[str, Any]`): The dict of current policy learn state, for saving and restoring.
+        """
+        return {
+            'model': self._learn_model.state_dict(),
+            'target_model': self._target_model.state_dict(),
+            'optimizer_world_model': self._optimizer_world_model.state_dict(),
+        }
+
+    # def _load_state_dict_learn(self, state_dict: Dict[str, Any]) -> None:
+    #     """
+    #     Overview:
+    #         Load the state_dict variable into policy learn mode.
+    #     Arguments:
+    #         - state_dict (:obj:`Dict[str, Any]`): The dict of policy learn state saved before.
+    #     """
+    #     self._learn_model.load_state_dict(state_dict['model'])
+    #     self._target_model.load_state_dict(state_dict['target_model'])
+    #     self._optimizer_world_model.load_state_dict(state_dict['optimizer_world_model'])
+
+    # ========== TODO ==========
+    def _load_state_dict_learn(self, state_dict: Dict[str, Any]) -> None:
+        """
+        Overview:
+            Load the state_dict variable into policy learn mode, excluding multi-task related parameters.
+        Arguments:
+            - state_dict (:obj:`Dict[str, Any]`): The dict of policy learn state saved previously.
+        """
+        # 定义需要排除的参数前缀
+        exclude_prefixes = [
+            '_orig_mod.world_model.head_policy_multi_task.',
+            '_orig_mod.world_model.head_value_multi_task.',
+            '_orig_mod.world_model.head_rewards_multi_task.',
+            '_orig_mod.world_model.head_observations_multi_task.',
+            '_orig_mod.world_model.task_emb.'
+        ]
+
+        # 定义需要排除的具体参数（如果有特殊情况）
+        exclude_keys = [
+            '_orig_mod.world_model.task_emb.weight',
+            '_orig_mod.world_model.task_emb.bias',  # 如果存在则添加
+            # 添加其他需要排除的具体参数名
+        ]
+
+        def filter_state_dict(state_dict_loader: Dict[str, Any], exclude_prefixes: list, exclude_keys: list = []) -> Dict[str, Any]:
+            """
+            过滤掉需要排除的参数。
+            """
+            filtered = {}
+            for k, v in state_dict_loader.items():
+                if any(k.startswith(prefix) for prefix in exclude_prefixes):
+                    print(f"Excluding parameter: {k}")  # 调试用，查看哪些参数被排除
+                    continue
+                if k in exclude_keys:
+                    print(f"Excluding specific parameter: {k}")  # 调试用
+                    continue
+                filtered[k] = v
+            return filtered
+
+        # 过滤并加载 'model' 部分
+        if 'model' in state_dict:
+            model_state_dict = state_dict['model']
+            filtered_model_state_dict = filter_state_dict(model_state_dict, exclude_prefixes, exclude_keys)
+            missing_keys, unexpected_keys = self._learn_model.load_state_dict(filtered_model_state_dict, strict=False)
+            if missing_keys:
+                print(f"Missing keys when loading _learn_model: {missing_keys}")
+            if unexpected_keys:
+                print(f"Unexpected keys when loading _learn_model: {unexpected_keys}")
+        else:
+            print("No 'model' key found in the state_dict.")
+
+        # 过滤并加载 'target_model' 部分
+        if 'target_model' in state_dict:
+            target_model_state_dict = state_dict['target_model']
+            filtered_target_model_state_dict = filter_state_dict(target_model_state_dict, exclude_prefixes, exclude_keys)
+            missing_keys, unexpected_keys = self._target_model.load_state_dict(filtered_target_model_state_dict, strict=False)
+            if missing_keys:
+                print(f"Missing keys when loading _target_model: {missing_keys}")
+            if unexpected_keys:
+                print(f"Unexpected keys when loading _target_model: {unexpected_keys}")
+        else:
+            print("No 'target_model' key found in the state_dict.")
+
+        # 加载优化器的 state_dict，不需要过滤，因为优化器通常不包含模型参数
+        if 'optimizer_world_model' in state_dict:
+            optimizer_state_dict = state_dict['optimizer_world_model']
+            try:
+                self._optimizer_world_model.load_state_dict(optimizer_state_dict)
+            except Exception as e:
+                print(f"Error loading optimizer state_dict: {e}")
+        else:
+            print("No 'optimizer_world_model' key found in the state_dict.")
+
+        # 如果需要，还可以加载其他部分，例如 scheduler 等