Skip to content

Commit

Permalink
Merge pull request PaddlePaddle#5 from MRXLT/master
Browse files Browse the repository at this point in the history
add serving part for PLSC
  • Loading branch information
MRXLT authored Dec 18, 2019
2 parents 36cc759 + fec841a commit 0fa1461
Show file tree
Hide file tree
Showing 47 changed files with 659 additions and 1 deletion.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ PLSC具备以下特点:
### 预测部署

* [模型导出](docs/export_for_infer.md)
* [C++预测库使用]
* [C++预测库使用](docs/serving.md)

### 高级功能

Expand Down
46 changes: 46 additions & 0 deletions docs/serving.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
# PLSC Serving

### 安装

server端

需要python3环境,下载whl包

https://paddle-serving.bj.bcebos.com/paddle-gpu-serving/wheel/plsc_serving-0.1.4-py3-none-any.whl

pip3 install plsc_serving-0.1.4-py3-none-any.whl

client端

需要安装ujson,pip install ujson

复制[client脚本](./serving/client/face_service/face_service.py)到使用路径

### 使用

server端

目前仅支持在GPU机器上进行预测,运行环境要求cuda版本>=9.0。

```python
from plsc_serving.run import PLSCServer
fs = PLSCServer()
fs.with_model(model_name = 'face_resnet50')
#跑单个进程,gpu_index指定使用的gpu,int类型,默认为0;port指定使用的端口,int类型,默认为8866
fs.run(gpu_index = 0, port = 8010)
```

client端

```python
from face_service import FaceService
with open('./data/00000000.jpg', 'rb') as f:
image = f.read()
fc = FaceService()
#添加server端连接,str类型,默认本机8010端口
fc.connect('127.0.0.1:8010')
#调用server端预测,输入为样本列表list类型,返回值为样本对应的embedding结果,list类型,shape为 batch size * embedding size
result = fc.encode([image])
print(result[0])
bc.close()
```
Binary file added serving/client/face_service/data/00000000.jpg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
92 changes: 92 additions & 0 deletions serving/client/face_service/face_service.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
# coding:utf-8
import sys
import time
from base64 import b64encode
import ujson
import os
from PIL import Image

_ver = sys.version_info
is_py2 = (_ver[0] == 2)
is_py3 = (_ver[0] == 3)

if is_py2:
import httplib
if is_py3:
import http.client as httplib


class FaceService():
def __init__(self):
self.con_list = []
self.con_index = 0
self.server_list = []

def connect(self, server='127.0.0.1:8010'):
self.server_list.append(server)
con = httplib.HTTPConnection(server)
self.con_list.append(con)

def connect_all_server(self, server_list):
for server in server_list:
self.server_list.append(server)
self.con_list.append(httplib.HTTPConnection(server))

def infer(self, request_msg):

try:
cur_con = self.con_list[self.con_index]
cur_con.request('POST', "/FaceClassifyService/inference",
request_msg, {"Content-Type": "application/json"})
response = cur_con.getresponse()
response_msg = response.read()
#print(response_msg)
response_msg = ujson.loads(response_msg)
self.con_index += 1
self.con_index = self.con_index % len(self.con_list)
return response_msg

except BaseException as err:
del self.con_list[self.con_index]
print(err)
if len(self.con_list) == 0:
print('All server failed')
return 'fail'
else:
self.con_index = 0
return 'retry'

def encode(self, images):
request = []
for image in range(images):
request.append(b64encode(image).decode('ascii'))

#request
request = {"base64_string": request}
request_msg = ujson.dumps(request)

response_msg = self.infer(request_msg)
result = []
for msg in response_msg["instance"]:
result.append(msg["embedding"])

#request end
return result

def close(self):
for con in self.con_list:
con.close()


def test():
with open('./data/00000000.jpg', 'rb') as f:
image = f.read()
bc = FaceService()
bc.connect('127.0.0.1:8010')
result = bc.encode([image])
print(result[0])
bc.close()


if __name__ == '__main__':
test()
15 changes: 15 additions & 0 deletions serving/server/plsc_serving/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

__version__ = '0.1.4'
189 changes: 189 additions & 0 deletions serving/server/plsc_serving/run/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,189 @@
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import os
import re
import tarfile
import face_serving
import subprocess
import imp
import time


class PLSCServer():
def __init__(self, with_gpu=True):
os.chdir(self.get_path())
self.with_gpu_flag = with_gpu
self.p_list = []
self.use_other_model = False
self.run_m = False
self.model_url = 'https://paddle-serving.bj.bcebos.com/paddle-gpu-serving/model-face'
self.bin_url = 'https://paddle-serving.bj.bcebos.com/paddle-gpu-serving/bin-face'
self.cpu_run_cmd = './bin/serving-cpu --bthread_min_concurrency=4 --bthread_concurrency=4 --logtostderr=true '
self.gpu_run_cmd = './bin/serving-gpu --bthread_min_concurrency=4 --bthread_concurrency=4 --logtostderr=true '
self.model_path_str = ''
self.get_exe()

def get_exe(self):
exe_path = './bin'
module_version = face_serving.__version__
target_version_list = module_version.strip().split('.')
target_version = target_version_list[0] + '.' + target_version_list[1]
need_download = False

if os.path.exists(exe_path):
with open('./bin/serving-version.txt') as f:
serving_version = f.read().strip()
if serving_version != target_version:
need_download = True
else:
need_download = True
if need_download:
tar_name = 'face-serving-' + target_version + '-bin.tar.gz'
bin_url = self.bin_url + '/' + tar_name
print('Frist time run, downloading PaddleServing components ...')
os.system('wget ' + bin_url + ' --no-check-certificate')
print('Decompressing files ..')
tar = tarfile.open(tar_name)
tar.extractall()
tar.close()
os.remove(tar_name)

def modify_conf(self, gpu_index=0):
os.chdir(self.get_path())
engine_name = 'name: "face_resnet50"'
if not self.with_gpu_flag:
with open('./conf/model_toolkit.prototxt', 'r') as f:
conf_str = f.read()
conf_str = re.sub('GPU', 'CPU', conf_str)
conf_str = re.sub('name.*"', engine_name, conf_str)
conf_str = re.sub('model_data_path.*"', self.model_path_str,
conf_str)
conf_str = re.sub('enable_memory_optimization: 0',
'enable_memory_optimization: 1', conf_str)
open('./conf/model_toolkit.prototxt', 'w').write(conf_str)

else:
conf_file = './conf/model_toolkit.prototxt.' + str(gpu_index)
with open(conf_file, 'r') as f:
conf_str = f.read()
conf_str = re.sub('CPU', 'GPU', conf_str)
conf_str = re.sub('name.*"', engine_name, conf_str)
conf_str = re.sub('model_data_path.*"', self.model_path_str,
conf_str)
conf_str = re.sub('enable_memory_optimization: 0',
'enable_memory_optimization: 1', conf_str)
open(conf_file, 'w').write(conf_str)

def hold(self):
try:
while True:
time.sleep(60)
except KeyboardInterrupt:
print("Server is going to quit")
time.sleep(5)

def run(self, gpu_index=0, port=8866):

os.chdir(self.get_path())
self.modify_conf(gpu_index)

if self.with_gpu_flag == True:
gpu_msg = '--gpuid=' + str(gpu_index) + ' '
run_cmd = self.gpu_run_cmd + gpu_msg
run_cmd += '--port=' + str(
port) + ' ' + '--resource_file=resource.prototxt.' + str(
gpu_index) + ' '
print('Start serving on gpu ' + str(gpu_index) + ' port = ' + str(
port))
else:
re = subprocess.Popen(
'cat /usr/local/cuda/version.txt > tmp 2>&1', shell=True)
re.wait()
if re.returncode == 0:
run_cmd = self.gpu_run_cmd + '--port=' + str(port) + ' '
else:
run_cmd = self.cpu_run_cmd + '--port=' + str(port) + ' '
print('Start serving on cpu port = {}'.format(port))

process = subprocess.Popen(run_cmd, shell=True)

self.p_list.append(process)
if not self.run_m:
self.hold()

def run_multi(self, gpu_index_list=[], port_list=[]):
self.run_m = True
if len(port_list) < 1:
print('Please set one port at least.')
return -1
if self.with_gpu_flag == True:
if len(gpu_index_list) != len(port_list):
print('Expect same length of gpu_index_list and port_list.')
return -1
for gpu_index, port in zip(gpu_index_list, port_list):
self.run(gpu_index=gpu_index, port=port)
else:
for port in port_list:
self.run(port=port)
self.hold()

def stop(self):
for p in self.p_list:
p.kill()

def show_conf(self):
'''
with open('./conf/model_toolkit.prototxt', 'r') as f:
conf_str = f.read()
print(conf_str)
'''

def with_model(self, model_name=None, model_url=None):
'''
if model_url != None:
self.mode_url = model_url
self.use_other_model = True
'''
if model_name == None or type(model_name) != str:
print('Please set model name string')
os.chdir(self.get_path())
self.get_model(model_name)

def get_path(self):
py_path = os.path.dirname(face_serving.__file__)
server_path = os.path.join(py_path, 'server')
return server_path

def get_model(self, model_name):
server_path = self.get_path()
tar_name = model_name + '.tar.gz'
model_url = self.model_url + '/' + tar_name

model_path = os.path.join(server_path, 'data/model/paddle/fluid')
if not os.path.exists(model_path):
os.makedirs('data/model/paddle/fluid')
os.chdir(model_path)
if os.path.exists(model_name):
pass
else:
os.system('wget ' + model_url + ' --no-check-certificate')
print('Decompressing files ..')
tar = tarfile.open(tar_name)
tar.extractall()
tar.close()
os.remove(tar_name)

self.model_path_str = r'model_data_path: "./data/model/paddle/fluid/' + model_name + r'"'
os.chdir(self.get_path())
15 changes: 15 additions & 0 deletions serving/server/plsc_serving/server/conf/cube.conf
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
[{
"dict_name": "dict",
"shard": 2,
"dup": 1,
"timeout": 200,
"retry": 3,
"backup_request": 100,
"type": "ipport_list",
"load_balancer": "rr",
"nodes": [{
"ipport_list": "list://xxx.xxx.xxx.xxx:8000"
},{
"ipport_list": "list://xxx.xxx.xxx.xxx:8000"
}]
}]
2 changes: 2 additions & 0 deletions serving/server/plsc_serving/server/conf/gflags.conf
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
--enable_model_toolkit
--enable_cube=false
11 changes: 11 additions & 0 deletions serving/server/plsc_serving/server/conf/model_toolkit.prototxt
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
engines {
name: "face_classify_model"
type: "FLUID_GPU_ANALYSIS_DIR"
reloadable_meta: "./data/model/paddle/fluid_time_file"
reloadable_type: "timestamp_ne"
model_data_path: "/home/xulongteng/.paddlehub/bert_service/bert_chinese_L-12_H-768_A-12"
runtime_thread_num: 0
batch_infer_size: 0
enable_batch_align: 0
enable_memory_optimization: 1
}
11 changes: 11 additions & 0 deletions serving/server/plsc_serving/server/conf/model_toolkit.prototxt.0
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
engines {
name: "bert"
type: "FLUID_GPU_ANALYSIS_DIR"
reloadable_meta: "./data/model/paddle/fluid_time_file"
reloadable_type: "timestamp_ne"
model_data_path: "/home/xulongteng/.paddlehub/bert_service/bert_chinese_L-12_H-768_A-12"
runtime_thread_num: 0
batch_infer_size: 0
enable_batch_align: 0
enable_memory_optimization: 0
}
Loading

0 comments on commit 0fa1461

Please sign in to comment.