Create rewards.py

redd-rl · web-flow · commit 61d31c7a760d · 2025-09-01T16:15:46.000+02:00
this is needed to prevent clutter
diff --git a/rewards.py b/rewards.py
@@ -0,0 +1,139 @@
+from typing import List, Dict, Any
+from rlgym.api import RewardFunction, AgentID
+from rlgym.rocket_league.api import GameState
+from rlgym.rocket_league import common_values
+import numpy as np
+
+class AdvancedTouchReward(RewardFunction[AgentID, GameState, float]):
+    def __init__(self, touch_reward: float = 0.0, acceleration_reward: float = 1, use_touch_count: bool = False):
+        self.touch_reward = touch_reward
+        self.acceleration_reward = acceleration_reward
+        self.use_touch_count = use_touch_count
+
+        self.prev_ball = None
+
+    def reset(self, agents: List[AgentID], initial_state: GameState, shared_info: Dict[str, Any]) -> None:
+        self.prev_ball = initial_state.ball
+
+    def get_rewards(self, agents: List[AgentID], state: GameState, is_terminated: Dict[AgentID, bool],
+                    is_truncated: Dict[AgentID, bool], shared_info: Dict[str, Any]) -> Dict[AgentID, float]:
+        rewards = {agent: 0 for agent in agents}
+        ball = state.ball
+        for agent in agents:
+            touches = state.cars[agent].ball_touches
+
+            if touches > 0:
+                if not self.use_touch_count:
+                    touches = 1
+                acceleration = np.linalg.norm(ball.linear_velocity - self.prev_ball.linear_velocity) / BALL_MAX_SPEED
+                rewards[agent] += self.touch_reward * touches
+                rewards[agent] += acceleration * self.acceleration_reward
+
+        self.prev_ball = ball
+
+        return rewards
+
+class FaceBallReward(RewardFunction):
+    """Rewards the agent for facing the ball"""
+    def reset(self, agents: List[AgentID], initial_state: GameState, shared_info: Dict[str, Any]) -> None:
+        pass
+
+
+    def get_rewards(self, agents: List[AgentID], state: GameState, is_terminated: Dict[AgentID, bool],
+                    is_truncated: Dict[AgentID, bool], shared_info: Dict[str, Any]) -> Dict[AgentID, float]:
+        rewards = {}
+
+        for agent in agents:
+            car = state.cars[agent]
+            ball = state.ball
+
+            car_pos = car.physics.position
+            ball_pos = ball.position
+            direction_to_ball = ball_pos - car_pos
+            norm = np.linalg.norm(direction_to_ball)
+
+            if norm > 0:
+                direction_to_ball /= norm
+
+            car_forward = car.physics.forward
+            dot_product = np.dot(car_forward, direction_to_ball)
+
+            reward = dot_product  # Dot product directly indicates alignment (-1 to 1)
+            rewards[agent] = reward
+
+        return rewards
+                        
+class SpeedTowardBallReward(RewardFunction[AgentID, GameState, float]):
+    """Rewards the agent for moving quickly toward the ball"""
+    
+    def reset(self, agents: List[AgentID], initial_state: GameState, shared_info: Dict[str, Any]) -> None:
+        pass
+    
+    def get_rewards(self, agents: List[AgentID], state: GameState, is_terminated: Dict[AgentID, bool],
+                    is_truncated: Dict[AgentID, bool], shared_info: Dict[str, Any]) -> Dict[AgentID, float]:
+        rewards = {}
+        for agent in agents:
+            car = state.cars[agent]
+            car_physics = car.physics if car.is_orange else car.inverted_physics
+            ball_physics = state.ball if car.is_orange else state.inverted_ball
+            player_vel = car_physics.linear_velocity
+            pos_diff = (ball_physics.position - car_physics.position)
+            dist_to_ball = np.linalg.norm(pos_diff)
+            dir_to_ball = pos_diff / dist_to_ball
+
+            speed_toward_ball = np.dot(player_vel, dir_to_ball)
+
+            rewards[agent] = max(speed_toward_ball / common_values.CAR_MAX_SPEED, 0.0)
+        return rewards
+
+class InAirReward(RewardFunction[AgentID, GameState, float]):
+    """Rewards the agent for being in the air"""
+    
+    def reset(self, agents: List[AgentID], initial_state: GameState, shared_info: Dict[str, Any]) -> None:
+        pass
+    
+    def get_rewards(self, agents: List[AgentID], state: GameState, is_terminated: Dict[AgentID, bool],
+                    is_truncated: Dict[AgentID, bool], shared_info: Dict[str, Any]) -> Dict[AgentID, float]:
+        return {agent: float(not state.cars[agent].on_ground) for agent in agents}
+
+class VelocityBallToGoalReward(RewardFunction[AgentID, GameState, float]):
+    """Rewards the agent for hitting the ball toward the opponent's goal"""
+    
+    def reset(self, agents: List[AgentID], initial_state: GameState, shared_info: Dict[str, Any]) -> None:
+        pass
+    
+    def get_rewards(self, agents: List[AgentID], state: GameState, is_terminated: Dict[AgentID, bool],
+                    is_truncated: Dict[AgentID, bool], shared_info: Dict[str, Any]) -> Dict[AgentID, float]:
+        rewards = {}
+        for agent in agents:
+            car = state.cars[agent]
+            ball = state.ball
+            if car.is_orange:
+                goal_y = -common_values.BACK_NET_Y
+            else:
+                goal_y = common_values.BACK_NET_Y
+
+            ball_vel = ball.linear_velocity
+            pos_diff = np.array([0, goal_y, 0]) - ball.position
+            dist = np.linalg.norm(pos_diff)
+            dir_to_goal = pos_diff / dist
+            
+            vel_toward_goal = np.dot(ball_vel, dir_to_goal)
+            rewards[agent] = max(vel_toward_goal / common_values.BALL_MAX_SPEED, 0)
+        return rewards
+
+
+class TouchReward(RewardFunction[AgentID, GameState, float]):
+    """
+    A RewardFunction that gives a reward of 1 if the agent touches the ball, 0 otherwise.
+    """
+
+    def reset(self, agents: List[AgentID], initial_state: GameState, shared_info: Dict[str, Any]) -> None:
+        pass
+
+    def get_rewards(self, agents: List[AgentID], state: GameState, is_terminated: Dict[AgentID, bool],
+                    is_truncated: Dict[AgentID, bool], shared_info: Dict[str, Any]) -> Dict[AgentID, float]:
+        return {agent: self._get_reward(agent, state) for agent in agents}
+
+    def _get_reward(self, agent: AgentID, state: GameState) -> float:
+        return 1. if state.cars[agent].ball_touches > 0 else 0.