Compare agents1.py

"""
Compare PPO, A2C, and DQN on MT5 trading environment.
- Each agent is trained separately
- Results (mean reward) are logged
"""

import time
import numpy as np
import pandas as pd
import gym
from gym import spaces
import MetaTrader5 as mt5

from stable_baselines3 import PPO, A2C, DQN
from stable_baselines3.common.vec_env import DummyVecEnv
from stable_baselines3.common.evaluation import evaluate_policy

# -------------------------
# CONFIG
# -------------------------
SYMBOL = "EURUSD"
TIMEFRAME = mt5.TIMEFRAME_M5
LOOKBACK = 50
TRAIN_TIMESTEPS = 20000
N_BARS = 5000
SEED = 42
# -------------------------

# -------------------------
# MT5 Connection
# -------------------------
def mt5_connect():
    if not mt5.initialize():
        raise RuntimeError(f"MT5 init failed: {mt5.last_error()}")
    if not mt5.symbol_select(SYMBOL, True):
        raise RuntimeError(f"Could not select {SYMBOL}")

def mt5_shutdown():
    mt5.shutdown()

def fetch_bars(symbol, timeframe, n_bars):
    rates = mt5.copy_rates_from_pos(symbol, timeframe, 0, n_bars)
    if rates is None:
        raise RuntimeError(f"Failed to fetch data: {mt5.last_error()}")
    df = pd.DataFrame(rates)
    df['time'] = pd.to_datetime(df['time'], unit='s')
    return df

# -------------------------
# Custom Gym Env
# -------------------------
class MT5TradingEnv(gym.Env):
    def __init__(self, df, lookback=LOOKBACK):
        super().__init__()
        self.df = df.reset_index(drop=True)
        self.lookback = lookback
        self.ptr = lookback
        self.position = 0
        self.entry_price = 0
        self.observation_space = spaces.Box(low=-np.inf, high=np.inf,
                                            shape=(lookback+1,), dtype=np.float32)
        self.action_space = spaces.Discrete(3)  # 0=hold, 1=buy, 2=sell

    def _get_obs(self):
        closes = self.df.loc[self.ptr-self.lookback:self.ptr-1, "close"].values.astype(np.float32)
        norm = closes / (closes[-1] + 1e-9) - 1.0
        return np.concatenate([norm, [float(self.position)]], axis=0)

    def reset(self):
        self.ptr = self.lookback
        self.position = 0
        self.entry_price = 0
        return self._get_obs()

    def step(self, action):
        done, reward = False, 0
        price = float(self.df.loc[self.ptr, "close"])
        if action == 1:  # buy
            if self.position == 0:
                self.position, self.entry_price = 1, price
            elif self.position == -1:
                reward += (self.entry_price - price)
                self.position, self.entry_price = 1, price
        elif action == 2:  # sell
            if self.position == 0:
                self.position, self.entry_price = -1, price
            elif self.position == 1:
                reward += (price - self.entry_price)
                self.position, self.entry_price = -1, price

        self.ptr += 1
        if self.ptr >= len(self.df):
            done = True
        else:
            next_price = float(self.df.loc[self.ptr, "close"])
            if self.position == 1:
                reward += (next_price - self.entry_price) * 0.1
            elif self.position == -1:
                reward += (self.entry_price - next_price) * 0.1

        obs = self._get_obs() if not done else np.zeros(self.observation_space.shape, dtype=np.float32)
        return obs, float(reward), done, {}

# -------------------------
# Training & Evaluation
# -------------------------
def run_comparison():
    mt5_connect()
    df = fetch_bars(SYMBOL, TIMEFRAME, N_BARS)
    mt5_shutdown()

    results = {}
    agents = {
        "PPO": PPO,
        "A2C": A2C,
        "DQN": DQN
    }

    for name, algo in agents.items():
        print(f"\n=== Training {name} ===")
        env = DummyVecEnv([lambda: MT5TradingEnv(df, lookback=LOOKBACK)])
        model = algo("MlpPolicy", env, verbose=0, seed=SEED)
        model.learn(total_timesteps=TRAIN_TIMESTEPS)
        mean_reward, std_reward = evaluate_policy(model, env, n_eval_episodes=5)
        results[name] = (mean_reward, std_reward)
        print(f"{name} → mean reward: {mean_reward:.2f}, std: {std_reward:.2f}")

    print("\n=== Summary ===")
    for k, v in results.items():
        print(f"{k}: mean {v[0]:.2f}, std {v[1]:.2f}")

if __name__ == "__main__":
    run_comparison()

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Compare agents1.py #7

-------------------------

CONFIG

-------------------------

-------------------------

-------------------------

MT5 Connection

-------------------------

-------------------------

Custom Gym Env

-------------------------

-------------------------

Training & Evaluation

-------------------------

Metadata

Assignees

Labels

Projects

Milestone

Relationships

Development

Compare agents1.py #7

Description

-------------------------

CONFIG

-------------------------

-------------------------

-------------------------

MT5 Connection

-------------------------

-------------------------

Custom Gym Env

-------------------------

-------------------------

Training & Evaluation

-------------------------

Metadata

Metadata

Assignees

Labels

Projects

Milestone

Relationships

Development

Issue actions