Skip to content

[CI] Fix gym rendering issues #2995

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions .github/unittest/linux_libs/scripts_gym/10_nvidia.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
{
"file_format_version" : "1.0.0",
"ICD" : {
"library_path" : "libEGL_nvidia.so.0"
}
}
14 changes: 12 additions & 2 deletions .github/unittest/linux_libs/scripts_gym/run_test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -18,12 +18,22 @@ lib_dir="${env_dir}/lib"
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$lib_dir
export MKL_THREADING_LAYER=GNU

# Start Xvfb with specific OpenGL configuration
export DISPLAY=:99
Xvfb :99 -screen 0 1400x900x24 -ac +extension GLX +render -noreset > /dev/null 2>&1 &
sleep 3 # Give Xvfb time to start

# Verify OpenGL/EGL setup
glxinfo -B || true
echo "EGL_PLATFORM=$EGL_PLATFORM"
echo "MUJOCO_GL=$MUJOCO_GL"
echo "PYOPENGL_PLATFORM=$PYOPENGL_PLATFORM"

# Run the tests
python .github/unittest/helpers/coverage_run_parallel.py -m pytest test/smoke_test.py -v --durations 200
python .github/unittest/helpers/coverage_run_parallel.py -m pytest test/smoke_test_deps.py -v --durations 200 -k 'test_gym'

unset LD_PRELOAD
export DISPLAY=:99
Xvfb :99 -screen 0 1400x900x24 > /dev/null 2>&1 &
python .github/unittest/helpers/coverage_run_parallel.py -m pytest test/test_libs.py --instafail -v --durations 200 -k "gym and not isaac" --error-for-skips --mp_fork
coverage combine
coverage xml -i
30 changes: 23 additions & 7 deletions .github/unittest/linux_libs/scripts_gym/setup_env.sh
Original file line number Diff line number Diff line change
Expand Up @@ -5,14 +5,28 @@
#
# Do not install PyTorch and torchvision here, otherwise they also get cached.

unset PYTORCH_VERSION
# For unittest, nightly PyTorch is used as the following section,
# so no need to set PYTORCH_VERSION.
# In fact, keeping PYTORCH_VERSION forces us to hardcode PyTorch version in config.
apt-get update && apt-get install -y \
git wget gcc g++ \
libglfw3 libgl1-mesa-glx libosmesa6 libglew-dev \
libglvnd0 libgl1 libglx0 libegl1 libgles2 \
xvfb libegl-dev libx11-dev freeglut3-dev \
mesa-utils mesa-common-dev \
libsdl2-dev libsdl2-2.0-0

set -e
set -v

this_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
# Avoid error: "fatal: unsafe repository"
apt-get update && apt-get install -y git wget gcc g++
apt-get install -y libglfw3 libgl1-mesa-glx libosmesa6 libglew-dev libsdl2-dev libsdl2-2.0-0
apt-get install -y libglvnd0 libgl1 libglx0 libegl1 libgles2 xvfb libegl-dev libx11-dev freeglut3-dev

# Setup EGL
mkdir -p /usr/share/glvnd/egl_vendor.d
cp $this_dir/10_nvidia.json /usr/share/glvnd/egl_vendor.d/10_nvidia.json

# Avoid error: "fatal: unsafe repository"
git config --global --add safe.directory '*'
root_dir="$(git rev-parse --show-toplevel)"
conda_dir="${root_dir}/conda"
Expand Down Expand Up @@ -79,13 +93,15 @@ conda env config vars set \
SDL_VIDEODRIVER=dummy \
DISPLAY=:99 \
PYOPENGL_PLATFORM=egl \
LD_PRELOAD=$glew_path \
__GLX_VENDOR_LIBRARY_NAME=nvidia \
MESA_GL_VERSION_OVERRIDE=3.3 \
EGL_PLATFORM=x11 \
LD_PRELOAD=/usr/lib/x86_64-linux-gnu/libGLEW.so:/usr/lib/x86_64-linux-gnu/libGL.so \
NVIDIA_PATH=/usr/src/nvidia-470.63.01 \
MUJOCO_PY_MJKEY_PATH=${root_dir}/mujoco-py/mujoco_py/binaries/mjkey.txt \
MUJOCO_PY_MUJOCO_PATH=${root_dir}/mujoco-py/mujoco_py/binaries/linux/mujoco210 \
LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/pytorch/rl/mujoco-py/mujoco_py/binaries/linux/mujoco210/bin
LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/pytorch/rl/mujoco-py/mujoco_py/binaries/linux/mujoco210/bin \
TOKENIZERS_PARALLELISM=true
# LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/home/circleci/project/mujoco-py/mujoco_py/binaries/linux/mujoco210/bin

# make env variables apparent
conda deactivate && conda activate "${env_dir}"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ dependencies:
- mujoco
- patchelf
- pyopengl==3.1.4
- pyglet<1.5.0
- ray
- av
- h5py
15 changes: 11 additions & 4 deletions .github/unittest/linux_olddeps/scripts_gym_0_13/run_test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -22,13 +22,20 @@ export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/pytorch/rl/mujoco-py/mujoco_py/binaries
export MKL_THREADING_LAYER=GNU
export BATCHED_PIPE_TIMEOUT=60

python .github/unittest/helpers/coverage_run_parallel.py -m pytest test/smoke_test.py -v --durations 200
python .github/unittest/helpers/coverage_run_parallel.py -m pytest test/smoke_test_deps.py -v --durations 200 -k 'test_gym'

# Start Xvfb with specific OpenGL configuration
export DISPLAY=:99
Xvfb :99 -screen 0 1400x900x24 > /dev/null 2>&1 &
sleep 3 # Give Xvfb time to start

# Verify OpenGL setup
glxinfo -B || true
echo "MUJOCO_GL=$MUJOCO_GL"
echo "PYOPENGL_PLATFORM=$PYOPENGL_PLATFORM"

python .github/unittest/helpers/coverage_run_parallel.py -m pytest test/smoke_test.py -v --durations 200
python .github/unittest/helpers/coverage_run_parallel.py -m pytest test/smoke_test_deps.py -v --durations 200 -k 'test_gym'

CKPT_BACKEND=torch MUJOCO_GL=egl python .github/unittest/helpers/coverage_run_parallel.py -m pytest \
MUJOCO_GL=osmesa python .github/unittest/helpers/coverage_run_parallel.py -m pytest \
--instafail -v \
--durations 200 \
--ignore test/test_distributed.py \
Expand Down
75 changes: 56 additions & 19 deletions .github/unittest/linux_olddeps/scripts_gym_0_13/setup_env.sh
Original file line number Diff line number Diff line change
Expand Up @@ -8,27 +8,63 @@
set -e
set -v

this_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"

apt-get update && apt-get upgrade -y
printf "* Installing vim - git - wget\n"
apt-get install -y vim git wget
# Make apt-get non-interactive
export DEBIAN_FRONTEND=noninteractive
# Pre-configure timezone data
ln -fs /usr/share/zoneinfo/UTC /etc/localtime
echo "UTC" > /etc/timezone

printf "* Installing glfw - glew - osmesa part 1\n"
apt-get install -y libglvnd0 libgl1 libglx0 libegl1 libgles2 xvfb libx11-dev libegl-dev
this_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"

#printf "* Installing glfw - glew - osmesa part 2\n"
#apt-get install -y libglfw3 libgl1-mesa-glx libosmesa6 libglew-dev libsdl2-dev libsdl2-2.0-0
# Add NVIDIA repository for drivers
apt-get update && apt-get install -y --no-install-recommends \
software-properties-common \
wget \
ca-certificates

# Install basic build tools first
apt-get install -y vim git wget build-essential

# Install system libraries to fix version conflicts
apt-get install -y --no-install-recommends \
libffi7 \
libffi-dev \
libtinfo6 \
libtinfo-dev \
libncurses5-dev \
libncursesw5-dev

# Install OpenGL packages with focus on OSMesa
apt-get install -y --no-install-recommends \
libosmesa6-dev \
libgl1-mesa-dev \
libgl1-mesa-glx \
libglew-dev \
libglfw3-dev \
libglvnd0 \
libgl1 \
libglx0 \
libegl1 \
libgles2 \
xvfb \
mesa-utils \
mesa-common-dev \
libglu1-mesa-dev \
libsdl2-dev \
libsdl2-2.0-0 \
pkg-config

if [ "${CU_VERSION:-}" == cpu ] ; then
# solves version `GLIBCXX_3.4.29' not found for tensorboard
# apt-get install -y gcc-4.9
apt-get upgrade -y libstdc++6
apt-get dist-upgrade -y
else
apt-get install -y g++ gcc
fi

# Remove conflicting libraries from conda environment if they exist
rm -f "${env_dir}/lib/libtinfo.so"* || true
rm -f "${env_dir}/lib/libffi.so"* || true

git config --global --add safe.directory '*'
root_dir="$(git rev-parse --show-toplevel)"
conda_dir="${root_dir}/conda"
Expand Down Expand Up @@ -93,19 +129,20 @@ printf "* Installing dependencies (except PyTorch)\n"
echo " - python=${PYTHON_VERSION}" >> "${this_dir}/environment.yml"
cat "${this_dir}/environment.yml"

export MUJOCO_GL=egl
# Use OSMesa for rendering
export MUJOCO_GL=osmesa
conda env config vars set \
MAX_IDLE_COUNT=1000 \
MUJOCO_GL=egl \
MUJOCO_GL=osmesa \
SDL_VIDEODRIVER=dummy \
DISPLAY=unix:0.0 \
PYOPENGL_PLATFORM=egl \
LD_PRELOAD=$glew_path \
NVIDIA_PATH=/usr/src/nvidia-470.63.01 \
DISPLAY=:99 \
PYOPENGL_PLATFORM=osmesa \
LD_PRELOAD=/usr/lib/x86_64-linux-gnu/libOSMesa.so.6:/usr/lib/x86_64-linux-gnu/libGL.so \
MUJOCO_PY_MJKEY_PATH=${root_dir}/mujoco-py/mujoco_py/binaries/mjkey.txt \
MUJOCO_PY_MUJOCO_PATH=${root_dir}/mujoco-py/mujoco_py/binaries/linux/mujoco210 \
LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/home/circleci/project/mujoco-py/mujoco_py/binaries/linux/mujoco210/bin \
TOKENIZERS_PARALLELISM=true
LD_LIBRARY_PATH=/usr/lib/x86_64-linux-gnu:$LD_LIBRARY_PATH:/home/circleci/project/mujoco-py/mujoco_py/binaries/linux/mujoco210/bin \
TOKENIZERS_PARALLELISM=true \
PYGLET_GRAPHICS=opengl3

# make env variables apparent
conda deactivate && conda activate "${env_dir}"
Expand Down
2 changes: 1 addition & 1 deletion test/_utils_internal.py
Original file line number Diff line number Diff line change
Expand Up @@ -182,7 +182,7 @@ def get_available_devices():
def get_default_devices():
num_cuda = torch.cuda.device_count()
if num_cuda == 0:
# if torch.mps.is_available():
# if getattr(torch.mps, "is_available", lambda: False)():
# return [torch.device("mps:0")]
return [torch.device("cpu")]
elif num_cuda == 1:
Expand Down
5 changes: 3 additions & 2 deletions test/test_collector.py
Original file line number Diff line number Diff line change
Expand Up @@ -1070,7 +1070,7 @@ def test_no_deepcopy_policy(self, collector_type):
shared_device = torch.device("cpu")
if torch.cuda.is_available():
original_device = torch.device("cuda:0")
elif torch.mps.is_available():
elif getattr(torch.mps, "is_available", lambda: False)():
original_device = torch.device("mps")
else:
pytest.skip("No GPU or MPS device")
Expand Down Expand Up @@ -2614,7 +2614,8 @@ def test_multi_collector_consistency(


@pytest.mark.skipif(
not torch.cuda.is_available() and not torch.mps.is_available(),
not torch.cuda.is_available()
and not getattr(torch.mps, "is_available", lambda: False)(),
reason="No casting if no cuda",
)
class TestUpdateParams:
Expand Down
2 changes: 1 addition & 1 deletion torchrl/data/replay_buffers/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -1036,6 +1036,6 @@ def tree_iter(pytree): # noqa: F811
def _auto_device() -> torch.device:
if torch.cuda.is_available():
return torch.device("cuda:0")
elif torch.mps.is_available():
elif getattr(torch.mps, "is_available", lambda: False)():
return torch.device("mps:0")
return torch.device("cpu")
Loading