Skip to content

Commit 1bc3aa6

Browse files
karllessardsaudet
andauthored
Fix native builds on GitHub Actions using Bazel cache (#240)
* Dump GCP credentials to file * Pass BAZEL_EXTRA_OPTS environment variable * Pass native extra build flags to Maven * Reactivate windows GPU builds * Remove more stuff from CUDA to avoid running out of disk space (#239) * Fix archive cleanup paths * Include Windows in GPU platform Co-authored-by: Samuel Audet <samuel.audet@gmail.com>
1 parent c6d7c25 commit 1bc3aa6

File tree

5 files changed

+65
-29
lines changed

5 files changed

+65
-29
lines changed

.github/workflows/ci.yml

Lines changed: 37 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ on:
1111
env:
1212
STAGING_PROFILE_ID: 46f80d0729c92d
1313
NATIVE_BUILD_PROJECTS: tensorflow-core/tensorflow-core-generator,tensorflow-core/tensorflow-core-api
14+
GCP_CREDS: ${{ secrets.GCP_CREDS }}
1415
jobs:
1516
quick-build:
1617
if: github.event_name == 'pull_request' && !contains(github.event.pull_request.labels.*.name, 'CI build')
@@ -76,8 +77,9 @@ jobs:
7677
tar hxvf $HOME/nccl.txz --strip-components=1 -C /usr/local/cuda/
7778
mv /usr/local/cuda/lib/* /usr/local/cuda/lib64/
7879
echo Removing downloaded archives and unused libraries to avoid running out of disk space
79-
rm -f *.rpm *.tgz *.txz *.tar.*
80+
rm -f $HOME/*.rpm $HOME/*.tgz $HOME/*.txz $HOME/*.tar.*
8081
rm -f $(find /usr/local/cuda/ -name '*.a' -and -not -name libcudart_static.a -and -not -name libcudadevrt.a)
82+
rm -rf /usr/local/cuda/doc* /usr/local/cuda/libnvvp* /usr/local/cuda/nsight* /usr/local/cuda/samples*
8183
fi
8284
- name: Checkout repository
8385
uses: actions/checkout@v1
@@ -94,8 +96,14 @@ jobs:
9496
mkdir -p $HOME/.m2
9597
[[ "${{ github.event_name }}" == "push" ]] && MAVEN_PHASE=deploy || MAVEN_PHASE=install
9698
echo "<settings><servers><server><id>ossrh</id><username>${{ secrets.CI_DEPLOY_USERNAME }}</username><password>${{ secrets.CI_DEPLOY_PASSWORD }}</password></server></servers></settings>" > $HOME/.m2/settings.xml
99+
if [[ "${{ github.event_name }}" == "push" && "${{ github.repository }}" == "tensorflow/java" ]]; then
100+
printf '%s\n' "${GCP_CREDS}" > $HOME/gcp_creds.json
101+
export BAZEL_CACHE="--remote_cache=https://storage.googleapis.com/tensorflow-sigs-jvm --remote_upload_local_results=true --google_credentials=$HOME/gcp_creds.json"
102+
else
103+
export BAZEL_CACHE="--remote_cache=https://storage.googleapis.com/tensorflow-sigs-jvm --remote_upload_local_results=false"
104+
fi
97105
echo Executing Maven $MAVEN_PHASE
98-
mvn clean $MAVEN_PHASE -B -U -e -Djavacpp.platform=linux-x86_64 -Djavacpp.platform.extension=${{ matrix.ext }} -pl $NATIVE_BUILD_PROJECTS -am -DstagingRepositoryId=${{ needs.prepare.outputs.stagingRepositoryId }}
106+
mvn clean $MAVEN_PHASE -B -U -e -Djavacpp.platform=linux-x86_64 -Djavacpp.platform.extension=${{ matrix.ext }} -pl $NATIVE_BUILD_PROJECTS -am -DstagingRepositoryId=${{ needs.prepare.outputs.stagingRepositoryId }} "-Dnative.build.flags=$BAZEL_CACHE"
99107
df -h
100108
macosx-x86_64:
101109
if: github.event_name == 'push' || contains(github.event.pull_request.labels.*.name, 'CI build')
@@ -123,17 +131,23 @@ jobs:
123131
mkdir -p $HOME/.m2
124132
[[ "${{ github.event_name }}" == "push" ]] && MAVEN_PHASE=deploy || MAVEN_PHASE=install
125133
echo "<settings><servers><server><id>ossrh</id><username>${{ secrets.CI_DEPLOY_USERNAME }}</username><password>${{ secrets.CI_DEPLOY_PASSWORD }}</password></server></servers></settings>" > $HOME/.m2/settings.xml
134+
if [[ "${{ github.event_name }}" == "push" && "${{ github.repository }}" == "tensorflow/java" ]]; then
135+
printf '%s\n' "${GCP_CREDS}" > $HOME/gcp_creds.json
136+
export BAZEL_CACHE="--remote_cache=https://storage.googleapis.com/tensorflow-sigs-jvm --remote_upload_local_results=true --google_credentials=$HOME/gcp_creds.json"
137+
else
138+
export BAZEL_CACHE="--remote_cache=https://storage.googleapis.com/tensorflow-sigs-jvm --remote_upload_local_results=false"
139+
fi
126140
df -h
127141
echo Executing Maven $MAVEN_PHASE
128-
mvn clean $MAVEN_PHASE -B -U -e -Djavacpp.platform=macosx-x86_64 -Djavacpp.platform.extension=${{ matrix.ext }} -pl $NATIVE_BUILD_PROJECTS -am -DstagingRepositoryId=${{ needs.prepare.outputs.stagingRepositoryId }}
142+
mvn clean $MAVEN_PHASE -B -U -e -Djavacpp.platform=macosx-x86_64 -Djavacpp.platform.extension=${{ matrix.ext }} -pl $NATIVE_BUILD_PROJECTS -am -DstagingRepositoryId=${{ needs.prepare.outputs.stagingRepositoryId }} "-Dnative.build.flags=$BAZEL_CACHE"
129143
df -h
130144
windows-x86_64:
131145
if: github.event_name == 'push' || contains(github.event.pull_request.labels.*.name, 'CI build')
132146
runs-on: windows-latest
133147
needs: prepare
134148
strategy:
135149
matrix:
136-
ext: ["", -mkl] # -gpu, -mkl-gpu]
150+
ext: ["", -gpu, -mkl] #, -mkl-gpu]
137151
steps:
138152
- name: Configure page file
139153
uses: al-cheb/configure-pagefile-action@v1.2
@@ -154,16 +168,16 @@ jobs:
154168
mkdir C:\bazel
155169
curl.exe -L https://github.com/bazelbuild/bazel/releases/download/3.1.0/bazel-3.1.0-windows-x86_64.exe -o C:/bazel/bazel.exe --retry 10
156170
set "EXT=${{ matrix.ext }}"
157-
if "%EXT:~-4%"=="-gpu" (
158-
echo Removing some unused stuff to avoid running out of disk space
159-
rm.exe -Rf "C:/Program Files (x86)/Android" "C:/Program Files/dotnet" "%CONDA%" "%GOROOT_1_10_X64%" "%GOROOT_1_11_X64%" "%GOROOT_1_12_X64%" "%GOROOT_1_13_X64%" "C:\hostedtoolcache\windows\Ruby" "C:\Rust"
160-
echo Installing CUDA
161-
curl.exe -L https://developer.download.nvidia.com/compute/cuda/11.0.3/local_installers/cuda_11.0.3_451.82_win10.exe -o cuda.exe
162-
curl.exe -L https://developer.download.nvidia.com/compute/redist/cudnn/v8.0.3/cudnn-11.0-windows-x64-v8.0.3.33.zip -o cudnn.zip
163-
cuda.exe -s
164-
mkdir cuda
165-
unzip.exe cudnn.zip
166-
cp.exe -a cuda/include cuda/lib cuda/bin "C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v11.0/"
171+
if "%EXT:~-4%" == "-gpu" (
172+
echo Removing some unused stuff to avoid running out of disk space
173+
rm.exe -Rf "C:/Program Files (x86)/Android" "C:/Program Files/dotnet" "%CONDA%" "%GOROOT_1_10_X64%" "%GOROOT_1_11_X64%" "%GOROOT_1_12_X64%" "%GOROOT_1_13_X64%" "C:\hostedtoolcache\windows\Ruby" "C:\Rust"
174+
echo Installing CUDA
175+
curl.exe -L https://developer.download.nvidia.com/compute/cuda/11.0.3/local_installers/cuda_11.0.3_451.82_win10.exe -o cuda.exe
176+
curl.exe -L https://developer.download.nvidia.com/compute/redist/cudnn/v8.0.3/cudnn-11.0-windows-x64-v8.0.3.33.zip -o cudnn.zip
177+
cuda.exe -s
178+
mkdir cuda
179+
unzip.exe cudnn.zip
180+
cp.exe -a cuda/include cuda/lib cuda/bin "C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v11.0/"
167181
)
168182
echo %JAVA_HOME%
169183
- name: Checkout repository
@@ -187,12 +201,19 @@ jobs:
187201
call mvn -version
188202
bazel version
189203
mkdir %USERPROFILE%\.m2
190-
if "${{ github.event_name }}"=="push" (set MAVEN_PHASE=deploy) else (set MAVEN_PHASE=install)
204+
if "${{ github.event_name }}" == "push" (set MAVEN_PHASE=deploy) else (set MAVEN_PHASE=install)
191205
echo ^<settings^>^<servers^>^<server^>^<id^>ossrh^</id^>^<username^>${{ secrets.CI_DEPLOY_USERNAME }}^</username^>^<password^>${{ secrets.CI_DEPLOY_PASSWORD }}^</password^>^</server^>^</servers^>^</settings^> > %USERPROFILE%\.m2\settings.xml
206+
set "BAZEL_CACHE=--remote_cache=https://storage.googleapis.com/tensorflow-sigs-jvm --remote_upload_local_results=false"
207+
if "${{ github.event_name }}" == "push" (
208+
if "${{ github.repository }}" == "tensorflow/java" (
209+
printenv GCP_CREDS > %USERPROFILE%\gcp_creds.json
210+
set "BAZEL_CACHE=--remote_cache=https://storage.googleapis.com/tensorflow-sigs-jvm --remote_upload_local_results=true --google_credentials=%USERPROFILE%\gcp_creds.json"
211+
)
212+
)
192213
df -h
193214
wmic pagefile list /format:list
194215
echo Executing Maven %MAVEN_PHASE%
195-
call mvn clean %MAVEN_PHASE% -B -U -e -Djavacpp.platform=windows-x86_64 -Djavacpp.platform.extension=${{ matrix.ext }} -pl %NATIVE_BUILD_PROJECTS% -am -DstagingRepositoryId=${{ needs.prepare.outputs.stagingRepositoryId }}
216+
call mvn clean %MAVEN_PHASE% -B -U -e -Djavacpp.platform=windows-x86_64 -Djavacpp.platform.extension=${{ matrix.ext }} -pl %NATIVE_BUILD_PROJECTS% -am -DstagingRepositoryId=${{ needs.prepare.outputs.stagingRepositoryId }} "-Dnative.build.flags=%BAZEL_CACHE%"
196217
if ERRORLEVEL 1 exit /b
197218
df -h
198219
wmic pagefile list /format:list

tensorflow-core/tensorflow-core-api/build.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ BUILD_FLAGS="$BUILD_FLAGS --experimental_repo_remote_exec --python_path="$PYTHON
3333
BUILD_FLAGS="$BUILD_FLAGS --distinct_host_configuration=true"
3434

3535
# Build C/C++ API of TensorFlow itself including a target to generate ops for Java
36-
bazel build $BUILD_FLAGS \
36+
bazel build $BUILD_FLAGS $BUILD_EXTRA_FLAGS \
3737
@org_tensorflow//tensorflow:tensorflow_cc \
3838
@org_tensorflow//tensorflow/tools/lib_package:jnilicenses_generate \
3939
:java_proto_gen_sources \

tensorflow-core/tensorflow-core-api/external/tensorflow-windows.patch

Lines changed: 22 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,8 @@
1-
diff -ruN tensorflow-1.14.0-rc1/third_party/mkl/mkl.BUILD tensorflow-1.14.0-rc1-windows/third_party/mkl/mkl.BUILD
2-
--- tensorflow-1.14.0-rc1/third_party/mkl/mkl.BUILD 2019-06-08 11:23:20.000000000 +0900
3-
+++ tensorflow-1.14.0-rc1-windows/third_party/mkl/mkl.BUILD 2019-06-12 08:30:41.232683854 +0900
4-
@@ -35,11 +35,23 @@
1+
diff --git a/third_party/mkl/BUILD b/third_party/mkl/BUILD
2+
index aa65b585b85..4e6546eac34 100644
3+
--- a/third_party/mkl/BUILD
4+
+++ b/third_party/mkl/BUILD
5+
@@ -91,10 +91,23 @@ cc_library(
56
visibility = ["//visibility:public"],
67
)
78

@@ -20,11 +21,24 @@ diff -ruN tensorflow-1.14.0-rc1/third_party/mkl/mkl.BUILD tensorflow-1.14.0-rc1-
2021
cc_library(
2122
name = "mkl_libs_windows",
2223
- srcs = [
23-
- "lib/libiomp5md.lib",
24-
- "lib/mklml.lib",
24+
- "@llvm_openmp//:libiomp5md.dll",
2525
+ deps = [
2626
+ "iomp5",
27-
+ "mklml",
27+
+ "mklml"
2828
],
29-
linkopts = ["/FORCE:MULTIPLE"],
3029
visibility = ["//visibility:public"],
30+
)
31+
diff --git a/third_party/llvm_openmp/BUILD b/third_party/llvm_openmp/BUILD
32+
index 099a84dcbaa..f7f9d44118f 100644
33+
--- a/third_party/llvm_openmp/BUILD
34+
+++ b/third_party/llvm_openmp/BUILD
35+
@@ -71,7 +71,7 @@ omp_vars_linux = {
36+
37+
# Windows Cmake vars to expand.
38+
omp_vars_win = {
39+
- "MSVC": 1,
40+
+ "MSVC": 0,
41+
}
42+
43+
omp_all_cmake_vars = select({
44+

tensorflow-core/tensorflow-core-api/pom.xml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -260,6 +260,7 @@
260260
</buildCommand>
261261
<environmentVariables>
262262
<EXTENSION>${javacpp.platform.extension}</EXTENSION>
263+
<BUILD_EXTRA_FLAGS>${native.build.flags}</BUILD_EXTRA_FLAGS>
263264
</environmentVariables>
264265
<workingDirectory>${project.basedir}</workingDirectory>
265266
</configuration>

tensorflow-core/tensorflow-core-platform-gpu/pom.xml

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -45,12 +45,12 @@
4545
<version>${javacpp.version}</version>
4646
<classifier>${javacpp.platform.linux-x86_64}</classifier>
4747
</dependency>
48-
<!--dependency>
48+
<dependency>
4949
<groupId>org.bytedeco</groupId>
5050
<artifactId>javacpp</artifactId>
5151
<version>${javacpp.version}</version>
5252
<classifier>${javacpp.platform.windows-x86_64}</classifier>
53-
</dependency-->
53+
</dependency>
5454
<dependency>
5555
<groupId>${project.groupId}</groupId>
5656
<artifactId>${javacpp.moduleId}</artifactId>
@@ -62,12 +62,12 @@
6262
<version>${project.version}</version>
6363
<classifier>${javacpp.platform.linux-x86_64.extension}</classifier>
6464
</dependency>
65-
<!--dependency>
65+
<dependency>
6666
<groupId>${project.groupId}</groupId>
6767
<artifactId>${javacpp.moduleId}</artifactId>
6868
<version>${project.version}</version>
6969
<classifier>${javacpp.platform.windows-x86_64.extension}</classifier>
70-
</dependency-->
70+
</dependency>
7171
</dependencies>
7272

7373
<build>

0 commit comments

Comments
 (0)