Skip to content

Commit f122de4

Browse files
committed
Merge branch 'RodgerZhu-branch-dev/vfl-fedlearner' into branch-dev/vfl-fedlearner
2 parents f69efaa + 6cbc8fe commit f122de4

39 files changed

+592
-82
lines changed

.github/workflows/build-hfl-docker-image.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
name: HFL-TensorFlow Docker Image CI
22

33
on:
4-
pull_request:
5-
branches: [ branch0.1/hfl-tensorflow ]
4+
push:
5+
branches: [ main ]
66

77
jobs:
88

.github/workflows/build-main-docker-image.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
name: Main Branch Docker Image CI
22

33
on:
4-
schedule:
5-
- cron: '* 12 * * 6'
4+
push:
5+
branches: [ main ]
66

77
jobs:
88
test_schedule:

.gitmodules

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,3 +4,6 @@
44
[submodule "cczoo/rats-tls/rats-tls"]
55
path = cczoo/rats-tls/rats-tls
66
url = https://github.com/inclavare-containers/rats-tls.git
7+
[submodule "cczoo/vertical_fl/vertical_fl"]
8+
path = cczoo/vertical_fl/vertical_fl
9+
url = https://github.com/bytedance/fedlearner.git
Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
FROM ubuntu:18.04
2+
3+
ENV LC_ALL=C.UTF-8
4+
ENV LANG=C.UTF-8
5+
6+
# Add steps here to set up dependencies
7+
RUN apt-get update \
8+
&& apt-get install -y \
9+
autoconf \
10+
bison \
11+
build-essential \
12+
coreutils \
13+
gawk \
14+
git \
15+
golang \
16+
libcurl4-openssl-dev \
17+
libgl1-mesa-glx \
18+
libprotobuf-c-dev \
19+
protobuf-c-compiler \
20+
python3.7 \
21+
python3-protobuf \
22+
python3-pip \
23+
python3-dev \
24+
python3-click \
25+
python3-jinja2 \
26+
libnss-mdns \
27+
libnss-myhostname \
28+
libcurl4-openssl-dev \
29+
libprotobuf-c-dev \
30+
ninja-build \
31+
wget \
32+
curl \
33+
&& apt-get install -y --no-install-recommends apt-utils
34+
35+
RUN pip3 install --upgrade pip
36+
RUN pip install django-model-utils
37+
38+
RUN mkdir client
39+
RUN mkdir -p client/ssl_configure
40+
COPY requirements.txt client/
41+
RUN pip3 install -r client/requirements.txt
42+
COPY resnet_client_grpc.py client/
43+
COPY utils.py client/
44+
COPY ssl_configure client/ssl_configure
45+

cczoo/tensorflow-serving-cluster/tensorflow-serving/docker/secret_prov/run_secret_prov.sh

Lines changed: 14 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
#!/usr/bin/env bash
2+
13
#
24
# Copyright (c) 2021 Intel Corporation
35
#
@@ -13,17 +15,18 @@
1315
# See the License for the specific language governing permissions and
1416
# limitations under the License.
1517

16-
#!/usr/bin/env bash
1718
set -e
1819

19-
2020
function usage_help() {
2121
echo -e "options:"
2222
echo -e " -h Display help"
2323
echo -e " -i {image_id}"
24+
echo -e " -a {pccs_service_com}"
2425
}
2526

26-
while getopts "h?i:" OPT; do
27+
pccs_service_com="localhost:127.0.0.1"
28+
29+
while getopts "h?i:a:" OPT; do
2730
case $OPT in
2831
h|\?)
2932
usage_help
@@ -33,6 +36,10 @@ while getopts "h?i:" OPT; do
3336
echo -e "Option $OPTIND, image_id = $OPTARG"
3437
image_id=$OPTARG
3538
;;
39+
a)
40+
echo -e "Option $OPTIND, pccs_service_com = $OPTARG"
41+
pccs_service_com=$OPTARG
42+
;;
3643
?)
3744
echo -e "Unknown option $OPTARG"
3845
usage_help
@@ -41,4 +48,7 @@ while getopts "h?i:" OPT; do
4148
esac
4249
done
4350

44-
docker run -itd -p 4433:4433 -v /var/run/aesmd/aesm.socket:/var/run/aesmd/aesm.socket ${image_id}
51+
docker run -itd -p 4433:4433 \
52+
-v /var/run/aesmd/aesm.socket:/var/run/aesmd/aesm.socket \
53+
--add-host=${pccs_service_com} \
54+
${image_id}

cczoo/tensorflow-serving-cluster/tensorflow-serving/docker/secret_prov/secret_prov.dockerfile

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -96,8 +96,6 @@ RUN cd ${GRAMINEDIR}/CI-Examples/ra-tls-secret-prov \
9696

9797
COPY certs/server2-sha256.crt ${GRAMINEDIR}/CI-Examples/ra-tls-secret-prov/certs
9898

99-
# Please replace pccs_host_machin_id with real IP address
100-
RUN echo "pccs_host_machin_id attestation.service.com" > /etc/hosts
10199
COPY sgx_default_qcnl.conf /etc/
102100
COPY entrypoint_secret_prov_server.sh /usr/bin/
103101
RUN chmod +x /usr/bin/entrypoint_secret_prov_server.sh
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
# PCCS server address
2-
PCCS_URL=https://attestation.service.com:8081/sgx/certification/v3/
2+
PCCS_URL=https://pccs.service.com:8081/sgx/certification/v3/
33
# # To accept insecure HTTPS cert, set this option to FALSE
44
USE_SECURE_CERT=FALSE
55

cczoo/tensorflow-serving-cluster/tensorflow-serving/docker/tf_serving/run_gramine_tf_serving.sh

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
#!/usr/bin/env bash
2+
13
#
24
# Copyright (c) 2021 Intel Corporation
35
#
@@ -13,8 +15,6 @@
1315
# See the License for the specific language governing permissions and
1416
# limitations under the License.
1517

16-
#!/usr/bin/env bash
17-
1818
set -e
1919

2020
function usage_help() {

cczoo/vertical_fl/README.md

Lines changed: 199 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,199 @@
1+
2+
## Prerequisites
3+
4+
- Ubuntu 18.04. This solution should work on other Linux distributions as well, but for simplicity we provide the steps for Ubuntu 18.04 only.
5+
6+
- Docker Engine. Docker Engine is an open source containerization technology for building and containerizing your applications. In this solution, Gramine, Fedlearner, gRPC will be built in Docker images. Please follow [this guide](https://docs.docker.com/engine/install/ubuntu/#install-using-the-convenience-script) to install Docker engine.
7+
8+
- SGX capable platform. Intel SGX Driver and SDK/PSW. You need a machine that supports Intel SGX and FLC/DCAP. Please follow [this guide](https://download.01.org/intel-sgx/latest/linux-latest/docs/) to install the Intel SGX driver and SDK/PSW. One way to verify SGX enabling status in your machine is to run [QuoteGeneration](https://github.com/intel/SGXDataCenterAttestationPrimitives/blob/master/QuoteGeneration) and [QuoteVerification](https://github.com/intel/SGXDataCenterAttestationPrimitives/blob/master/QuoteVerification) successfully.
9+
10+
Here, we will demonstrate how to run leader and follower from two containers.
11+
12+
13+
14+
## Executing Fedlearner in SGX
15+
16+
### 1. Download source code
17+
18+
```
19+
git clone -b fix_dev_sgx https://github.com/bytedance/fedlearner.git
20+
cd fedlearner
21+
git submodule init
22+
git submodule update
23+
```
24+
25+
### 2. Build Docker image
26+
27+
```
28+
img_tag=Your_defined_tag
29+
./sgx/build_dev_docker_image.sh ${img_tag}
30+
```
31+
32+
*Note:* `build_dev_docker_image.sh` provides parameter `proxy_server` to help you set your network proxy. It can be removed from this script if it is not needed.
33+
34+
You will get the built image:
35+
36+
```
37+
REPOSITORY TAG IMAGE ID CREATED SIZE
38+
fedlearner-sgx-dev latest 8c3c7a05f973 45 hours ago 15.2GB
39+
```
40+
41+
### 3. Start Container
42+
43+
In terminal 1, start container to run leader:
44+
45+
```
46+
docker run -it \
47+
--name=fedlearner_leader \
48+
--restart=unless-stopped \
49+
-p 50051:50051 \
50+
--device=/dev/sgx_enclave:/dev/sgx/enclave \
51+
--device=/dev/sgx_provision:/dev/sgx/provision \
52+
fedlearner-sgx-dev:latest \
53+
bash
54+
```
55+
56+
In terminal 2, start container to run follower:
57+
58+
```
59+
docker run -it \
60+
--name=fedlearner_follwer \
61+
--restart=unless-stopped \
62+
-p 50052:50052 \
63+
--device=/dev/sgx_enclave:/dev/sgx/enclave \
64+
--device=/dev/sgx_provision:/dev/sgx/provision \
65+
fedlearner-sgx-dev:latest \
66+
bash
67+
```
68+
69+
#### 3.1 Configure PCCS
70+
71+
- If you are using public cloud instance, please replace the PCCS url in `/etc/sgx_default_qcnl.conf` with the new pccs url provided by the cloud.
72+
73+
```
74+
Old: PCCS_URL=https://pccs.service.com:8081/sgx/certification/v3/
75+
New: PCCS_URL=https://public_cloud_pccs_url
76+
```
77+
78+
- If you are using your own machine, please make sure, the PCCS service is running successfully in your host with command `systemctl status pccs`. And add your host IP address in `/etc/hosts` under container. For example:
79+
80+
```
81+
cat /etc/hosts
82+
XXX.XXX.XXX.XXX pccs.service.com #XXX.XXX.XXX.XXX is the host IP
83+
```
84+
85+
#### 3.2 Start aesm service
86+
87+
Execute below script in both leader and follower container:
88+
89+
```
90+
/root/start_aesm_service.sh
91+
```
92+
93+
#### 4. Prepare data
94+
95+
Generate data in both leader and follower container:
96+
97+
```
98+
cd /gramine/CI-Examples/wide_n_deep
99+
./test-ps-sgx.sh data
100+
```
101+
102+
#### 5. Compile applications
103+
104+
Compile applications in both leader and follower container:
105+
106+
```
107+
cd /gramine/CI-Examples/wide_n_deep
108+
./test-ps-sgx.sh make
109+
```
110+
111+
Please find `mr_enclave`,`mr_signer` from the print log as below:
112+
113+
```
114+
+ make
115+
+ grep 'mr_enclave\|mr_signer\|isv_prod_id\|isv_svn'
116+
isv_prod_id: 0
117+
isv_svn: 0
118+
mr_enclave: bda462c6483a15f18c92bbfd0acbb61b9702344202fcc6ceed194af00a00fc02
119+
mr_signer: dbf7a340bbed6c18345c6d202723364765d261fdb04e960deb4ca894d4274839
120+
isv_prod_id: 0
121+
isv_svn: 0
122+
```
123+
124+
Then, update the leader's `dynamic_config.json` under current folder with follower's `mr_enclave`,`mr_signer`. Also, update follower's `dynamic_config.json` with leader's `mr_enclave`,`mr_signer`.
125+
126+
```
127+
dynamic_config.json:
128+
{
129+
......
130+
"sgx_mrs": [
131+
{
132+
"mr_enclave": "",
133+
"mr_signer": "",
134+
"isv_prod_id": "0",
135+
"isv_svn": "0"
136+
}
137+
],
138+
......
139+
}
140+
141+
```
142+
143+
#### 6. Config leader and follower's IP
144+
145+
In leader's `test-ps-sgx.sh`, for `--peer-addr` , please replace `localhost` with `follower_contianer_ip`
146+
147+
```
148+
elif [ "$ROLE" == "leader" ]; then
149+
make_custom_env
150+
rm -rf model/leader
151+
......
152+
taskset -c 4-7 stdbuf -o0 gramine-sgx python -u leader.py \
153+
--local-addr=localhost:50051 \
154+
--peer-addr=follower_contianer_ip:50052
155+
```
156+
157+
In follower's `test-ps-sgx.sh`, for `--peer-addr` , please replace `localhost` with `leader_contianer_ip`
158+
159+
```
160+
elif [ "$ROLE" == "follower" ]; then
161+
make_custom_env
162+
rm -rf model/follower
163+
......
164+
taskset -c 12-15 stdbuf -o0 gramine-sgx python -u follower.py \
165+
--local-addr=localhost:50052 \
166+
--peer-addr=leader_container_ip:50051
167+
```
168+
169+
*Note:* Get the container ip under your host:
170+
171+
```
172+
docker inspect --format '{{ .NetworkSettings.IPAddress }}' container_id
173+
```
174+
175+
#### 7.Run the distributing training
176+
177+
Under leader container:
178+
179+
```
180+
cd /gramine/CI-Examples/wide_n_deep
181+
./test-ps-sgx.sh leader
182+
```
183+
184+
Under follower container:
185+
186+
```
187+
cd /gramine/CI-Examples/wide_n_deep
188+
./test-ps-sgx.sh follower
189+
```
190+
191+
Finally, the model file will be placed at
192+
193+
```
194+
./model/leader/id/save_model.pd
195+
```
196+
197+
```
198+
./model/follower/id/save_model.pd
199+
```

cczoo/vertical_fl/vertical_fl

Submodule vertical_fl added at dece6ef

0 commit comments

Comments
 (0)