forked from Ensembl/ensembl-vep
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathDockerfile
229 lines (204 loc) · 8.88 KB
/
Dockerfile
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
ARG BRANCH=main
###################################################
# Stage 1 - docker container to build ensembl-vep #
###################################################
FROM ubuntu:22.04 as builder
# Update aptitude and install some required packages
# a lot of them are required for Bio::DB::BigFile
RUN apt-get update && apt-get -y install \
build-essential \
git \
libpng-dev \
zlib1g-dev \
libbz2-dev \
liblzma-dev \
perl \
perl-base \
unzip \
wget && \
rm -rf /var/lib/apt/lists/*
# Setup VEP environment
ENV OPT /opt/vep
ENV OPT_SRC $OPT/src
ENV HTSLIB_DIR $OPT_SRC/htslib
ARG BRANCH
# Working directory
WORKDIR $OPT_SRC
# Clone/download repositories/libraries
RUN if [ "$BRANCH" = "main" ]; \
then export BRANCH_OPT=""; \
else export BRANCH_OPT="-b $BRANCH"; \
fi && \
# Get ensembl cpanfile in order to get the list of the required Perl libraries
wget -q "https://raw.githubusercontent.com/Ensembl/ensembl/$BRANCH/cpanfile" -O "ensembl_cpanfile" && \
# Clone ensembl-vep git repository
git clone $BRANCH_OPT --depth 1 https://github.com/Ensembl/ensembl-vep.git && chmod u+x ensembl-vep/*.pl && \
# Clone ensembl-variation git repository and compile C code
git clone $BRANCH_OPT --depth 1 https://github.com/Ensembl/ensembl-variation.git && \
mkdir var_c_code && \
cp ensembl-variation/C_code/*.c ensembl-variation/C_code/Makefile var_c_code/ && \
rm -rf ensembl-variation && \
chmod u+x var_c_code/* && \
# Clone bioperl-ext git repository - used by Haplosaurus
git clone --depth 1 https://github.com/bioperl/bioperl-ext.git && \
# Download ensembl-xs - it contains compiled versions of certain key subroutines used in VEP
wget https://github.com/Ensembl/ensembl-xs/archive/2.3.2.zip -O ensembl-xs.zip && \
unzip -q ensembl-xs.zip && mv ensembl-xs-2.3.2 ensembl-xs && rm -rf ensembl-xs.zip && \
# Clone/Download other repositories: bioperl-live is needed so the cpanm dependencies installation from the ensembl-vep/cpanfile file takes less disk space
ensembl-vep/travisci/get_dependencies.sh && \
# Only keep the bioperl-live "Bio" library
mv bioperl-live bioperl-live_bak && mkdir bioperl-live && mv bioperl-live_bak/Bio bioperl-live/ && rm -rf bioperl-live_bak && \
## A lot of cleanup on the imported libraries, in order to reduce the docker image ##
rm -rf Bio-HTS/.??* Bio-HTS/Changes Bio-HTS/DISCLAIMER Bio-HTS/MANIFEST* Bio-HTS/README Bio-HTS/scripts Bio-HTS/t Bio-HTS/travisci \
bioperl-ext/.??* bioperl-ext/Bio/SeqIO bioperl-ext/Bio/Tools bioperl-ext/Makefile.PL bioperl-ext/README* bioperl-ext/t bioperl-ext/examples \
ensembl-vep/.??* ensembl-vep/docker \
ensembl-xs/.??* ensembl-xs/TODO ensembl-xs/Changes ensembl-xs/INSTALL ensembl-xs/MANIFEST ensembl-xs/README ensembl-xs/t ensembl-xs/travisci \
htslib/.??* htslib/INSTALL htslib/NEWS htslib/README* htslib/test && \
# Only keep needed kent-335_base libraries for VEP - used by Bio::DB::BigFile (bigWig parsing)
mv kent-335_base kent-335_base_bak && mkdir -p kent-335_base/src && \
cp -R kent-335_base_bak/src/lib kent-335_base_bak/src/inc kent-335_base_bak/src/jkOwnLib kent-335_base/src/ && \
cp kent-335_base_bak/src/*.sh kent-335_base/src/ && \
rm -rf kent-335_base_bak
# Setup bioperl-ext
WORKDIR bioperl-ext/Bio/Ext/Align/
RUN perl -pi -e"s|(cd libs.+)CFLAGS=\\\'|\$1CFLAGS=\\\'-fPIC |" Makefile.PL
# Install htslib binaries (for 'bgzip' and 'tabix')
# htslib requires the packages 'zlib1g-dev', 'libbz2-dev' and 'liblzma-dev'
WORKDIR $HTSLIB_DIR
RUN make install && rm -f Makefile *.c
# Compile Variation LD C scripts
WORKDIR $OPT_SRC/var_c_code
RUN make && rm -f Makefile *.c
###################################################
# Stage 2 - docker container to build ensembl-vep #
###################################################
FROM ubuntu:22.04
# Update aptitude and install some required packages
# a lot of them are required for Bio::DB::BigFile
RUN apt-get update && apt-get -y install \
build-essential \
cpanminus \
curl \
libmysqlclient-dev \
libdbd-mysql-perl \
libpng-dev \
libssl-dev \
zlib1g-dev \
libbz2-dev \
liblzma-dev \
locales \
openssl \
perl \
perl-base \
unzip \
vim && \
apt-get -y purge manpages-dev && \
apt-get clean && \
rm -rf /var/lib/apt/lists/*
# Setup VEP environment
ENV OPT /opt/vep
ENV OPT_SRC $OPT/src
ENV PERL5LIB_TMP $PERL5LIB:$OPT_SRC/ensembl-vep:$OPT_SRC/ensembl-vep/modules
ENV PERL5LIB $PERL5LIB_TMP:$OPT_SRC/bioperl-live
ENV KENT_SRC $OPT/src/kent-335_base/src
ENV HTSLIB_DIR $OPT_SRC/htslib
ENV DEPS $OPT_SRC
ENV PATH $OPT_SRC/ensembl-vep:$OPT_SRC/var_c_code:$PATH
ENV LANG_VAR en_US.UTF-8
ARG BRANCH
# Create vep user
RUN useradd -r -m -U -d "$OPT" -s /bin/bash -c "VEP User" -p '' vep && \
chmod a+rx $OPT && \
usermod -a -G sudo vep && \
mkdir -p $OPT_SRC
USER vep
# Copy downloaded libraries (stage 1) to this image (stage 2)
COPY --chown=vep:vep --from=builder $OPT_SRC $OPT_SRC
#############################################################
# Change user to root for the following complilations/installations
USER root
# Install bioperl-ext, faster alignments for haplo (XS-based BioPerl extensions to C libraries)
WORKDIR $OPT_SRC/bioperl-ext/Bio/Ext/Align/
RUN perl Makefile.PL && make && make install && rm -f Makefile*
# Install ensembl-xs, faster run using re-implementation in C of some of the Perl subroutines
WORKDIR $OPT_SRC/ensembl-xs
RUN perl Makefile.PL && make && make install && rm -f Makefile* cpanfile
WORKDIR $OPT_SRC
# Install/compile more libraries
RUN export MACHTYPE=$(uname -m) &&\
ensembl-vep/travisci/build_c.sh && \
# Remove unused Bio-DB-HTS files
rm -rf Bio-HTS/cpanfile Bio-HTS/Build.PL Bio-HTS/Build Bio-HTS/_build Bio-HTS/INSTALL.pl && \
# Install ensembl perl dependencies (cpanm)
cpanm --installdeps --with-recommends --notest --cpanfile ensembl_cpanfile . && \
cpanm --installdeps --with-recommends --notest --cpanfile ensembl-vep/cpanfile . && \
# Delete bioperl and cpanfiles after the cpanm installs as bioperl will be reinstalled by the INSTALL.pl script
rm -rf bioperl-live ensembl_cpanfile ensembl-vep/cpanfile && \
# Configure "locale", see https://github.com/rocker-org/rocker/issues/19
echo "$LANG_VAR UTF-8" >> /etc/locale.gen && locale-gen en_US.utf8 && \
/usr/sbin/update-locale LANG=$LANG_VAR && \
# Copy htslib executables. It also requires the packages 'zlib1g-dev', 'libbz2-dev' and 'liblzma-dev'
cp $HTSLIB_DIR/bgzip $HTSLIB_DIR/tabix $HTSLIB_DIR/htsfile /usr/local/bin/ && \
# Remove CPAN cache
rm -rf /root/.cpanm
ENV LC_ALL $LANG_VAR
ENV LANG $LANG_VAR
# Switch back to vep user
USER vep
ENV PERL5LIB $PERL5LIB_TMP
# Setup Docker environment for when users run VEP and INSTALL.pl in Docker image:
# - skip VEP updates in INSTALL.pl
ENV VEP_NO_UPDATE 1
# - avoid Faidx/HTSLIB installation in INSTALL.pl
ENV VEP_NO_HTSLIB 1
# - skip plugin installation in INSTALL.pl
ENV VEP_NO_PLUGINS 1
# - set plugins directory for VEP and INSTALL.pl
ENV VEP_DIR_PLUGINS /plugins
ENV VEP_PLUGINSDIR $VEP_DIR_PLUGINS
WORKDIR $VEP_DIR_PLUGINS
# Update bash profile
WORKDIR $OPT_SRC/ensembl-vep
RUN echo >> $OPT/.profile && \
echo PATH=$PATH:\$PATH >> $OPT/.profile && \
echo export PATH >> $OPT/.profile && \
# Install Ensembl API and plugins
./INSTALL.pl --auto ap --plugins all --pluginsdir $VEP_DIR_PLUGINS --no_update --no_htslib && \
# Remove the ensemb-vep tests and travis
rm -rf t travisci .travis.yml
# Install dependencies for VEP plugins:
USER root
ENV PLUGIN_DEPS "https://raw.githubusercontent.com/Ensembl/VEP_plugins/$BRANCH/config"
# - Ubuntu packages
RUN curl -O "$PLUGIN_DEPS/ubuntu-packages.txt" && \
apt-get update && apt-get install -y --no-install-recommends \
$(sed -e s/\#.*//g ubuntu-packages.txt) && \
rm -rf /var/lib/apt/lists/* ubuntu-packages.txt
# - Symlink python to python2
RUN ln -s /usr/bin/python2 /usr/bin/python
# - Perl modules
RUN curl -O "$PLUGIN_DEPS/cpanfile" && \
cpanm --installdeps --with-recommends . && \
rm -rf /root/.cpanm cpanfile
# - Python packages
RUN curl -O https://raw.githubusercontent.com/paulfitz/mysql-connector-c/master/include/my_config.h && \
mv my_config.h /usr/include/mysql/my_config.h
RUN curl -O "$PLUGIN_DEPS/requirements.txt" && \
python2 -m pip install --no-cache-dir -r requirements.txt && \
rm requirements.txt
# Install GeneSplicer binary
USER vep
WORKDIR $VEP_DIR_PLUGINS
RUN curl -O ftp://ftp.ccb.jhu.edu/pub/software/genesplicer/GeneSplicer.tar.gz && \
tar -xzf GeneSplicer.tar.gz && \
rm GeneSplicer.tar.gz && \
cd GeneSplicer/sources && \
make && \
mv genesplicer .. && \
rm -rf GeneSplicer/*/
ENV PATH $VEP_DIR_PLUGINS/GeneSplicer:$PATH
# Set working directory as symlink to $OPT/.vep (containing VEP cache and data)
USER root
RUN ln -s $OPT/.vep /data
USER vep
WORKDIR /data