-
Notifications
You must be signed in to change notification settings - Fork 1
Integrations
Abhishek Gahlot edited this page Mar 27, 2026
·
1 revision
Drop-in reward functions for the big RL training frameworks.
Module: deepgym.integrations.trl
from deepgym.integrations.trl import make_trl_reward_fn
from deepgym import load_environment
from trl import GRPOTrainer
env = load_environment('coin_change')
reward_fn = make_trl_reward_fn(env)
trainer = GRPOTrainer(
model='Qwen/Qwen2-0.5B-Instruct',
reward_funcs=[reward_fn],
train_dataset=dataset,
)
trainer.train()from deepgym.integrations.trl import make_trl_async_reward_fn
async_reward_fn = make_trl_async_reward_fn(env)
trainer = GRPOTrainer(
model='Qwen/Qwen2-0.5B-Instruct',
reward_funcs=[async_reward_fn],
train_dataset=dataset,
)TRL expects: (completions: list[str], **kwargs) -> list[float]
Module: deepgym.integrations.verl
from deepgym.integrations.verl import make_verl_compute_score
compute_score = make_verl_compute_score(env)
# In your verl reward config: compute_score_func = compute_scoreverl expects: (data_source, solution_str, ground_truth, extra_info=None) -> float
from deepgym.integrations.verl import make_verl_reward_fn
reward_fn = make_verl_reward_fn(env)
scores = reward_fn({'responses': ['code1', 'code2', 'code3']})Module: deepgym.integrations.openrlhf
Spin up a FastAPI reward server:
from fastapi import FastAPI
from deepgym import load_environment
from deepgym.core import DeepGym
from deepgym.integrations.openrlhf import create_openrlhf_router
app = FastAPI()
env = load_environment('coin_change')
dg = DeepGym(mode='local')
app.include_router(create_openrlhf_router(env, dg))uvicorn app:app --port 8000Endpoint:
POST /reward/score
{"prompts": ["..."], "outputs": ["code1", "code2"]}
-> {"rewards": [0.85, 0.0, 1.0]}
Module: deepgym.integrations.lm_eval
from deepgym.integrations.lm_eval import register_deepgym_tasks
register_deepgym_tasks() # registers all built-in envs as deepgym_* taskslm_eval --model hf \
--model_args pretrained=Qwen/Qwen2-0.5B-Instruct \
--tasks deepgym_coin_change,deepgym_two_sum \
--num_fewshot 0Metrics: deepgym_score (float 0-1), deepgym_pass (binary 0/1)
Module: deepgym.integrations.hf
from deepgym.integrations.hf import push_environment_to_hub
push_environment_to_hub(
env,
repo_id='your-org/deepgym-coin-change',
env_name='coin_change',
private=False,
token='hf_...',
)from deepgym.integrations.hf import load_environment_from_hub
env = load_environment_from_hub('your-org/deepgym-coin-change')
result = dg.run(env, solution)from deepgym.integrations.hf import push_results_to_hub
push_results_to_hub(results_dict, repo_id='your-org/deepgym-leaderboard')Module: deepgym.integrations.reward
Framework-agnostic, works with anything:
from deepgym.integrations.reward import RewardFunction, AsyncRewardFunction
# sync
reward_fn = RewardFunction(env, max_parallel=10)
scores = reward_fn(['solution1', 'solution2']) # [0.8, 1.0]
batch = reward_fn.call_with_details(['...']) # BatchResult
shaped = reward_fn.shaped_rewards(['...']) # [{'correctness': 0.8}]
per_test = reward_fn.per_test_rewards(['...']) # [{'test_0': 1.0, 'overall': 0.75}]
# async
async_fn = AsyncRewardFunction(env, max_parallel=10)
scores = await async_fn(['...'])