Open
Description
#这是你的源代码
def __update(self, value: float):
""" 更新节点的访问次数 `N(s, a)`、节点的累计平均奖赏 `Q(s, a)`
Parameters
----------
value: float
用来更新节点内部数据
"""
self.Q = (self.N * self.Q + value)/(self.N + 1)
self.N += 1
def backup(self, value: float):
""" 反向传播 """
if self.parent:
self.parent.backup(-value) #there is the negative sign, why ?
self.__update(value)
thanks advance !
Metadata
Assignees
Labels
No labels
Activity