Skip to content

Compare agents1.py #7

@vicks4u

Description

@vicks4u

"""
Compare PPO, A2C, and DQN on MT5 trading environment.

  • Each agent is trained separately
  • Results (mean reward) are logged
    """

import time
import numpy as np
import pandas as pd
import gym
from gym import spaces
import MetaTrader5 as mt5

from stable_baselines3 import PPO, A2C, DQN
from stable_baselines3.common.vec_env import DummyVecEnv
from stable_baselines3.common.evaluation import evaluate_policy

-------------------------

CONFIG

-------------------------

SYMBOL = "EURUSD"
TIMEFRAME = mt5.TIMEFRAME_M5
LOOKBACK = 50
TRAIN_TIMESTEPS = 20000
N_BARS = 5000
SEED = 42

-------------------------

-------------------------

MT5 Connection

-------------------------

def mt5_connect():
if not mt5.initialize():
raise RuntimeError(f"MT5 init failed: {mt5.last_error()}")
if not mt5.symbol_select(SYMBOL, True):
raise RuntimeError(f"Could not select {SYMBOL}")

def mt5_shutdown():
mt5.shutdown()

def fetch_bars(symbol, timeframe, n_bars):
rates = mt5.copy_rates_from_pos(symbol, timeframe, 0, n_bars)
if rates is None:
raise RuntimeError(f"Failed to fetch data: {mt5.last_error()}")
df = pd.DataFrame(rates)
df['time'] = pd.to_datetime(df['time'], unit='s')
return df

-------------------------

Custom Gym Env

-------------------------

class MT5TradingEnv(gym.Env):
def init(self, df, lookback=LOOKBACK):
super().init()
self.df = df.reset_index(drop=True)
self.lookback = lookback
self.ptr = lookback
self.position = 0
self.entry_price = 0
self.observation_space = spaces.Box(low=-np.inf, high=np.inf,
shape=(lookback+1,), dtype=np.float32)
self.action_space = spaces.Discrete(3) # 0=hold, 1=buy, 2=sell

def _get_obs(self):
    closes = self.df.loc[self.ptr-self.lookback:self.ptr-1, "close"].values.astype(np.float32)
    norm = closes / (closes[-1] + 1e-9) - 1.0
    return np.concatenate([norm, [float(self.position)]], axis=0)

def reset(self):
    self.ptr = self.lookback
    self.position = 0
    self.entry_price = 0
    return self._get_obs()

def step(self, action):
    done, reward = False, 0
    price = float(self.df.loc[self.ptr, "close"])
    if action == 1:  # buy
        if self.position == 0:
            self.position, self.entry_price = 1, price
        elif self.position == -1:
            reward += (self.entry_price - price)
            self.position, self.entry_price = 1, price
    elif action == 2:  # sell
        if self.position == 0:
            self.position, self.entry_price = -1, price
        elif self.position == 1:
            reward += (price - self.entry_price)
            self.position, self.entry_price = -1, price

    self.ptr += 1
    if self.ptr >= len(self.df):
        done = True
    else:
        next_price = float(self.df.loc[self.ptr, "close"])
        if self.position == 1:
            reward += (next_price - self.entry_price) * 0.1
        elif self.position == -1:
            reward += (self.entry_price - next_price) * 0.1

    obs = self._get_obs() if not done else np.zeros(self.observation_space.shape, dtype=np.float32)
    return obs, float(reward), done, {}

-------------------------

Training & Evaluation

-------------------------

def run_comparison():
mt5_connect()
df = fetch_bars(SYMBOL, TIMEFRAME, N_BARS)
mt5_shutdown()

results = {}
agents = {
    "PPO": PPO,
    "A2C": A2C,
    "DQN": DQN
}

for name, algo in agents.items():
    print(f"\n=== Training {name} ===")
    env = DummyVecEnv([lambda: MT5TradingEnv(df, lookback=LOOKBACK)])
    model = algo("MlpPolicy", env, verbose=0, seed=SEED)
    model.learn(total_timesteps=TRAIN_TIMESTEPS)
    mean_reward, std_reward = evaluate_policy(model, env, n_eval_episodes=5)
    results[name] = (mean_reward, std_reward)
    print(f"{name} → mean reward: {mean_reward:.2f}, std: {std_reward:.2f}")

print("\n=== Summary ===")
for k, v in results.items():
    print(f"{k}: mean {v[0]:.2f}, std {v[1]:.2f}")

if name == "main":
run_comparison()

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions