Skip to content

Commit

Permalink
tests: switch sensitive amd gpu tests to safe math
Browse files Browse the repository at this point in the history
  • Loading branch information
mloubout committed Feb 4, 2025
1 parent 993808b commit 3bf89eb
Show file tree
Hide file tree
Showing 5 changed files with 35 additions and 27 deletions.
6 changes: 4 additions & 2 deletions .github/workflows/docker-bases.yml
Original file line number Diff line number Diff line change
Expand Up @@ -222,6 +222,10 @@ jobs:
file: './docker/Dockerfile.amd'
push: true
target: 'amdclang'
build-args: |
ROCM_VERSION=5.5.1
UCX_BRANCH=v1.13.1
OMPI_BRANCH=v4.1.4
tags: devitocodes/bases:amd

- name: AMD HIP image
Expand All @@ -231,6 +235,4 @@ jobs:
file: './docker/Dockerfile.amd'
push: true
target: 'hip'
build-args: |
arch=hip
tags: devitocodes/bases:amd-hip
2 changes: 1 addition & 1 deletion devito/arch/compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -563,7 +563,7 @@ def __init_finalize__(self, **kwargs):
if language in ['C', 'openmp']:
self.ldflags += ['-target', 'x86_64-pc-linux-gnu']
self.ldflags += ['-fopenmp']
self.ldflags += ['--offload-arch=native']
self.ldflags += ['--offload-arch=%s' % platform.march]
elif platform in [POWER8, POWER9]:
# It doesn't make much sense to use AOMP on Power, but it should work
self.cflags.append('-mcpu=native')
Expand Down
4 changes: 4 additions & 0 deletions docker/Dockerfile.amd
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,10 @@ ENV ROCM_HOME=/opt/rocm \
UCX_HOME=/opt/ucx \
OMPI_HOME=/opt/ompi

# Adding ROCM
ENV PATH=$ROCM_HOME/bin:$PATH \
LD_LIBRARY_PATH=$ROCM_HOME/lib:$ROCM_HOME/lib/llvm/lib:$LD_LIBRARY_PATH

# Until rocm base has it fixed
RUN ln -s /opt/rocm/llvm/bin/offload-arch /opt/rocm/bin/offload-arch | echo "offload-arch already exis"

Expand Down
48 changes: 26 additions & 22 deletions docker/Dockerfile.devito
Original file line number Diff line number Diff line change
Expand Up @@ -12,42 +12,46 @@ ARG USER_ID=1000
ARG GROUP_ID=1000

################## Install devito ############################################
# Copy Devito
ADD . /app/devito

# Update if outdated
RUN apt-get update
# Update if outdated and install extras
RUN apt-get update && \
apt-get install -y git cmake libncurses5-dev libncursesw5-dev libdrm-dev libsystemd-dev

# Remove git files
RUN rm -rf /app/devito/.git
# Usefull utilities
# Nvtop
RUN git clone https://github.com/Syllo/nvtop.git /app/nvtop && \
mkdir -p /app/nvtop/build && cd /app/nvtop/build && \
cmake .. -DNVIDIA_SUPPORT=ON -DAMDGPU_SUPPORT=ON -DINTEL_SUPPORT=ON && \
make && make install

# Install pip dependencies and devito as a pip package
# Install pip dependencies
RUN python3 -m venv /venv && \
/venv/bin/pip install --no-cache-dir --upgrade pip && \
/venv/bin/pip install --no-cache-dir jupyter && \
/venv/bin/pip install --no-cache-dir wheel && \
eval "$MPI4PY_FLAGS /venv/bin/pip install --no-cache-dir -r /app/devito/requirements-mpi.txt" && \
/venv/bin/pip install --no-cache-dir -e /app/devito[extras,tests] && \
rm -rf ~/.cache/pip

# Usefull utilities
# Nvtop
RUN apt-get install -y git cmake libncurses5-dev libncursesw5-dev libdrm-dev libsystemd-dev cmake && \
git clone https://github.com/Syllo/nvtop.git /app/nvtop && \
mkdir -p /app/nvtop/build && cd /app/nvtop/build && \
cmake .. -DNVIDIA_SUPPORT=ON -DAMDGPU_SUPPORT=ON -DINTEL_SUPPORT=ON && \
make && \
make install && \
ln -fs /app/nvtop/build/src/nvtop /venv/bin/nvtop

# Safety cleanup
RUN apt-get clean && apt-get autoclean && apt-get autoremove -y && \
rm -rf /var/lib/apt/lists/*
# Copy Devito
ADD . /app/devito

# Remove git files
RUN rm -rf /app/devito/.git

# Mpi4py
RUN eval "$MPI4PY_FLAGS /venv/bin/pip install --no-cache-dir --verbose -r /app/devito/requirements-mpi.txt"

# Devito
RUN /venv/bin/pip install --no-cache-dir -e /app/devito[extras,tests] && rm -rf ~/.cache/pip

FROM $base as user
# COPY is much faster than RUN chown by order of magnitude so we have a final step that
# just copies the built image into the user.

# Last installs (such as gdb needed in user mode) and cleanup
RUN apt-get update && apt install gdb -y && \
apt-get clean && apt-get autoclean && apt-get autoremove -y && \
rm -rf /var/lib/apt/lists/*

# User/Group Ids
ARG USER_ID=1000
ARG GROUP_ID=1000
Expand Down
2 changes: 0 additions & 2 deletions tests/test_gpu_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -643,7 +643,6 @@ def test_streaming_multi_input(self, opt, ntmps):

assert np.all(grad.data == grad1.data)

@switchconfig(safe_math=True)
def test_streaming_multi_input_conddim_foward(self):
nt = 10
grid = Grid(shape=(4, 4))
Expand Down Expand Up @@ -672,7 +671,6 @@ def test_streaming_multi_input_conddim_foward(self):

assert np.all(v.data == v1.data)

@switchconfig(safe_math=True)
def test_streaming_multi_input_conddim_backward(self):
nt = 10
grid = Grid(shape=(4, 4))
Expand Down

0 comments on commit 3bf89eb

Please sign in to comment.