Open
Description
When I run the command
python examples/train_task.py --algo_name=mopo --exp_name=halfcheetah --task HalfCheetah-v3 --task_data_type low --task_train_num 2
It shows :
File "examples/train_task.py", line 19, in <module>
fire.Fire(run_algo)
File "/home/lksgcc/.pyenv/versions/anaconda3-5.0.1/envs/mujoco_py/lib/python3.8/site-packages/fire/core.py", line 141, in Fire
component_trace = _Fire(component, args, parsed_flag_args, context, name)
File "/home/lksgcc/.pyenv/versions/anaconda3-5.0.1/envs/mujoco_py/lib/python3.8/site-packages/fire/core.py", line 466, in _Fire
component, remaining_args = _CallAndUpdateTrace(
File "/home/lksgcc/.pyenv/versions/anaconda3-5.0.1/envs/mujoco_py/lib/python3.8/site-packages/fire/core.py", line 681, in _CallAndUpdateTrace
component = fn(*varargs, **kwargs)
File "examples/train_task.py", line 16, in run_algo
algo_trainer.train(train_buffer, val_buffer, callback_fn=callback)
File "/media/lksgcc/new_disk/lk_git/3_Reinforcement_Learning/3_2_Offline_Learning/OfflineRL/offlinerl/algo/modelbase/mopo.py", line 94, in train
self.train_policy(train_buffer, val_buffer, self.transition, callback_fn)
File "/media/lksgcc/new_disk/lk_git/3_Reinforcement_Learning/3_2_Offline_Learning/OfflineRL/offlinerl/algo/modelbase/mopo.py", line 206, in train_policy
res = callback_fn(self.get_policy())
File "/media/lksgcc/new_disk/lk_git/3_Reinforcement_Learning/3_2_Offline_Learning/OfflineRL/offlinerl/evaluation/__init__.py", line 80, in __call__
eval_res.update(test_on_real_env(policy, self.env, number_of_runs=self.number_of_runs))
File "/media/lksgcc/new_disk/lk_git/3_Reinforcement_Learning/3_2_Offline_Learning/OfflineRL/offlinerl/evaluation/neorl.py", line 54, in test_on_real_env
results = [test_one_trail_sp_local(env, policy) for _ in range(number_of_runs)]
File "/media/lksgcc/new_disk/lk_git/3_Reinforcement_Learning/3_2_Offline_Learning/OfflineRL/offlinerl/evaluation/neorl.py", line 54, in <listcomp>
results = [test_one_trail_sp_local(env, policy) for _ in range(number_of_runs)]
File "/media/lksgcc/new_disk/lk_git/3_Reinforcement_Learning/3_2_Offline_Learning/OfflineRL/offlinerl/evaluation/neorl.py", line 39, in test_one_trail_sp_local
action = policy.get_action(state).reshape(-1, act_dim)
File "/media/lksgcc/new_disk/lk_git/3_Reinforcement_Learning/3_2_Offline_Learning/OfflineRL/offlinerl/utils/net/common.py", line 33, in get_action
act = to_array_as(self.policy_infer(obs_tensor), obs)
File "/media/lksgcc/new_disk/lk_git/3_Reinforcement_Learning/3_2_Offline_Learning/OfflineRL/offlinerl/utils/net/tanhpolicy.py", line 164, in policy_infer
return self(obs).mode
File "/home/lksgcc/.pyenv/versions/anaconda3-5.0.1/envs/mujoco_py/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1110, in _call_impl
return forward_call(*input, **kwargs)
File "/media/lksgcc/new_disk/lk_git/3_Reinforcement_Learning/3_2_Offline_Learning/OfflineRL/offlinerl/utils/net/tanhpolicy.py", line 147, in forward
logits, h = self.preprocess(obs, state)
File "/home/lksgcc/.pyenv/versions/anaconda3-5.0.1/envs/mujoco_py/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1110, in _call_impl
return forward_call(*input, **kwargs)
File "/media/lksgcc/new_disk/lk_git/3_Reinforcement_Learning/3_2_Offline_Learning/OfflineRL/offlinerl/utils/net/common.py", line 113, in forward
logits = self.model(s)
File "/home/lksgcc/.pyenv/versions/anaconda3-5.0.1/envs/mujoco_py/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1110, in _call_impl
return forward_call(*input, **kwargs)
File "/home/lksgcc/.pyenv/versions/anaconda3-5.0.1/envs/mujoco_py/lib/python3.8/site-packages/torch/nn/modules/container.py", line 141, in forward
input = module(input)
File "/home/lksgcc/.pyenv/versions/anaconda3-5.0.1/envs/mujoco_py/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1110, in _call_impl
return forward_call(*input, **kwargs)
File "/home/lksgcc/.pyenv/versions/anaconda3-5.0.1/envs/mujoco_py/lib/python3.8/site-packages/torch/nn/modules/linear.py", line 103, in forward
return F.linear(input, self.weight, self.bias)
RuntimeError: mat1 and mat2 shapes cannot be multiplied (18x1 and 18x256)
Other algos also show the same error. Thanks for solving this problem!
Metadata
Metadata
Assignees
Labels
No labels
Activity