Merge pull request PaddlePaddle#5 from MRXLT/master

add serving part for PLSC
qizhaoaoe · Dec 18, 2019 · 0fa1461 · 0fa1461
2 parents 36cc759 + fec841a
commit 0fa1461
Show file tree

Hide file tree

Showing 47 changed files with 659 additions and 1 deletion.
diff --git a/README.md b/README.md
@@ -29,7 +29,7 @@ PLSC具备以下特点：
 ### 预测部署
 
 * [模型导出](docs/export_for_infer.md)
-* [C++预测库使用]
+* [C++预测库使用](docs/serving.md)
 
 ### 高级功能
 

diff --git a/docs/serving.md b/docs/serving.md
@@ -0,0 +1,46 @@
+# PLSC Serving
+
+### 安装
+
+server端
+
+需要python3环境，下载whl包
+
+https://paddle-serving.bj.bcebos.com/paddle-gpu-serving/wheel/plsc_serving-0.1.4-py3-none-any.whl
+
+pip3 install plsc_serving-0.1.4-py3-none-any.whl
+
+client端
+
+需要安装ujson，pip install ujson
+
+复制[client脚本](./serving/client/face_service/face_service.py)到使用路径
+
+### 使用
+
+server端
+
+目前仅支持在GPU机器上进行预测,运行环境要求cuda版本>=9.0。
+
+```python
+from plsc_serving.run import PLSCServer
+fs = PLSCServer()
+fs.with_model(model_name = 'face_resnet50')
+#跑单个进程,gpu_index指定使用的gpu，int类型，默认为0；port指定使用的端口，int类型，默认为8866
+fs.run(gpu_index = 0, port = 8010)
+```
+
+client端
+
+```python
+from face_service import FaceService
+with open('./data/00000000.jpg', 'rb') as f:
+    image = f.read()
+fc = FaceService()
+#添加server端连接，str类型，默认本机8010端口
+fc.connect('127.0.0.1:8010')
+#调用server端预测，输入为样本列表list类型，返回值为样本对应的embedding结果,list类型，shape为 batch size * embedding size
+result = fc.encode([image])
+print(result[0])
+bc.close()
+```
diff --git a/serving/client/face_service/data/00000000.jpg b/serving/client/face_service/data/00000000.jpg
diff --git a/serving/client/face_service/face_service.py b/serving/client/face_service/face_service.py
@@ -0,0 +1,92 @@
+# coding:utf-8
+import sys
+import time
+from base64 import b64encode
+import ujson
+import os
+from PIL import Image
+
+_ver = sys.version_info
+is_py2 = (_ver[0] == 2)
+is_py3 = (_ver[0] == 3)
+
+if is_py2:
+    import httplib
+if is_py3:
+    import http.client as httplib
+
+
+class FaceService():
+    def __init__(self):
+        self.con_list = []
+        self.con_index = 0
+        self.server_list = []
+
+    def connect(self, server='127.0.0.1:8010'):
+        self.server_list.append(server)
+        con = httplib.HTTPConnection(server)
+        self.con_list.append(con)
+
+    def connect_all_server(self, server_list):
+        for server in server_list:
+            self.server_list.append(server)
+            self.con_list.append(httplib.HTTPConnection(server))
+
+    def infer(self, request_msg):
+
+        try:
+            cur_con = self.con_list[self.con_index]
+            cur_con.request('POST', "/FaceClassifyService/inference",
+                            request_msg, {"Content-Type": "application/json"})
+            response = cur_con.getresponse()
+            response_msg = response.read()
+            #print(response_msg)
+            response_msg = ujson.loads(response_msg)
+            self.con_index += 1
+            self.con_index = self.con_index % len(self.con_list)
+            return response_msg
+
+        except BaseException as err:
+            del self.con_list[self.con_index]
+            print(err)
+            if len(self.con_list) == 0:
+                print('All server failed')
+                return 'fail'
+            else:
+                self.con_index = 0
+                return 'retry'
+
+    def encode(self, images):
+        request = []
+        for image in range(images):
+            request.append(b64encode(image).decode('ascii'))
+
+        #request
+        request = {"base64_string": request}
+        request_msg = ujson.dumps(request)
+
+        response_msg = self.infer(request_msg)
+        result = []
+        for msg in response_msg["instance"]:
+            result.append(msg["embedding"])
+
+        #request end
+        return result
+
+    def close(self):
+        for con in self.con_list:
+            con.close()
+
+
+def test():
+    with open('./data/00000000.jpg', 'rb') as f:
+        image = f.read()
+    bc = FaceService()
+    bc.connect('127.0.0.1:8010')
+    result = bc.encode([image])
+    print(result[0])
+    bc.close()
+
+
+if __name__ == '__main__':
+    test()
diff --git a/serving/server/plsc_serving/__init__.py b/serving/server/plsc_serving/__init__.py
@@ -0,0 +1,15 @@
+# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+__version__ = '0.1.4'
diff --git a/serving/server/plsc_serving/run/__init__.py b/serving/server/plsc_serving/run/__init__.py
@@ -0,0 +1,189 @@
+# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+import re
+import tarfile
+import face_serving
+import subprocess
+import imp
+import time
+
+
+class PLSCServer():
+    def __init__(self, with_gpu=True):
+        os.chdir(self.get_path())
+        self.with_gpu_flag = with_gpu
+        self.p_list = []
+        self.use_other_model = False
+        self.run_m = False
+        self.model_url = 'https://paddle-serving.bj.bcebos.com/paddle-gpu-serving/model-face'
+        self.bin_url = 'https://paddle-serving.bj.bcebos.com/paddle-gpu-serving/bin-face'
+        self.cpu_run_cmd = './bin/serving-cpu --bthread_min_concurrency=4 --bthread_concurrency=4 --logtostderr=true '
+        self.gpu_run_cmd = './bin/serving-gpu --bthread_min_concurrency=4 --bthread_concurrency=4 --logtostderr=true '
+        self.model_path_str = ''
+        self.get_exe()
+
+    def get_exe(self):
+        exe_path = './bin'
+        module_version = face_serving.__version__
+        target_version_list = module_version.strip().split('.')
+        target_version = target_version_list[0] + '.' + target_version_list[1]
+        need_download = False
+
+        if os.path.exists(exe_path):
+            with open('./bin/serving-version.txt') as f:
+                serving_version = f.read().strip()
+            if serving_version != target_version:
+                need_download = True
+        else:
+            need_download = True
+        if need_download:
+            tar_name = 'face-serving-' + target_version + '-bin.tar.gz'
+            bin_url = self.bin_url + '/' + tar_name
+            print('Frist time run, downloading PaddleServing components ...')
+            os.system('wget ' + bin_url + ' --no-check-certificate')
+            print('Decompressing files ..')
+            tar = tarfile.open(tar_name)
+            tar.extractall()
+            tar.close()
+            os.remove(tar_name)
+
+    def modify_conf(self, gpu_index=0):
+        os.chdir(self.get_path())
+        engine_name = 'name: "face_resnet50"'
+        if not self.with_gpu_flag:
+            with open('./conf/model_toolkit.prototxt', 'r') as f:
+                conf_str = f.read()
+            conf_str = re.sub('GPU', 'CPU', conf_str)
+            conf_str = re.sub('name.*"', engine_name, conf_str)
+            conf_str = re.sub('model_data_path.*"', self.model_path_str,
+                              conf_str)
+            conf_str = re.sub('enable_memory_optimization: 0',
+                              'enable_memory_optimization: 1', conf_str)
+            open('./conf/model_toolkit.prototxt', 'w').write(conf_str)
+
+        else:
+            conf_file = './conf/model_toolkit.prototxt.' + str(gpu_index)
+            with open(conf_file, 'r') as f:
+                conf_str = f.read()
+            conf_str = re.sub('CPU', 'GPU', conf_str)
+            conf_str = re.sub('name.*"', engine_name, conf_str)
+            conf_str = re.sub('model_data_path.*"', self.model_path_str,
+                              conf_str)
+            conf_str = re.sub('enable_memory_optimization: 0',
+                              'enable_memory_optimization: 1', conf_str)
+            open(conf_file, 'w').write(conf_str)
+
+    def hold(self):
+        try:
+            while True:
+                time.sleep(60)
+        except KeyboardInterrupt:
+            print("Server is going to quit")
+            time.sleep(5)
+
+    def run(self, gpu_index=0, port=8866):
+
+        os.chdir(self.get_path())
+        self.modify_conf(gpu_index)
+
+        if self.with_gpu_flag == True:
+            gpu_msg = '--gpuid=' + str(gpu_index) + ' '
+            run_cmd = self.gpu_run_cmd + gpu_msg
+            run_cmd += '--port=' + str(
+                port) + ' ' + '--resource_file=resource.prototxt.' + str(
+                    gpu_index) + ' '
+            print('Start serving on gpu ' + str(gpu_index) + ' port = ' + str(
+                port))
+        else:
+            re = subprocess.Popen(
+                'cat /usr/local/cuda/version.txt > tmp 2>&1', shell=True)
+            re.wait()
+            if re.returncode == 0:
+                run_cmd = self.gpu_run_cmd + '--port=' + str(port) + ' '
+            else:
+                run_cmd = self.cpu_run_cmd + '--port=' + str(port) + ' '
+            print('Start serving on cpu port = {}'.format(port))
+
+        process = subprocess.Popen(run_cmd, shell=True)
+
+        self.p_list.append(process)
+        if not self.run_m:
+            self.hold()
+
+    def run_multi(self, gpu_index_list=[], port_list=[]):
+        self.run_m = True
+        if len(port_list) < 1:
+            print('Please set one port at least.')
+            return -1
+        if self.with_gpu_flag == True:
+            if len(gpu_index_list) != len(port_list):
+                print('Expect same length of gpu_index_list and port_list.')
+                return -1
+            for gpu_index, port in zip(gpu_index_list, port_list):
+                self.run(gpu_index=gpu_index, port=port)
+        else:
+            for port in port_list:
+                self.run(port=port)
+        self.hold()
+
+    def stop(self):
+        for p in self.p_list:
+            p.kill()
+
+    def show_conf(self):
+        '''
+        with open('./conf/model_toolkit.prototxt', 'r') as f:
+            conf_str = f.read()
+        print(conf_str)
+        '''
+
+    def with_model(self, model_name=None, model_url=None):
+        '''
+        if model_url != None:
+            self.mode_url = model_url
+            self.use_other_model = True
+        '''
+        if model_name == None or type(model_name) != str:
+            print('Please set model name string')
+        os.chdir(self.get_path())
+        self.get_model(model_name)
+
+    def get_path(self):
+        py_path = os.path.dirname(face_serving.__file__)
+        server_path = os.path.join(py_path, 'server')
+        return server_path
+
+    def get_model(self, model_name):
+        server_path = self.get_path()
+        tar_name = model_name + '.tar.gz'
+        model_url = self.model_url + '/' + tar_name
+
+        model_path = os.path.join(server_path, 'data/model/paddle/fluid')
+        if not os.path.exists(model_path):
+            os.makedirs('data/model/paddle/fluid')
+        os.chdir(model_path)
+        if os.path.exists(model_name):
+            pass
+        else:
+            os.system('wget ' + model_url + ' --no-check-certificate')
+            print('Decompressing files ..')
+            tar = tarfile.open(tar_name)
+            tar.extractall()
+            tar.close()
+            os.remove(tar_name)
+
+        self.model_path_str = r'model_data_path: "./data/model/paddle/fluid/' + model_name + r'"'
+        os.chdir(self.get_path())
diff --git a/serving/server/plsc_serving/server/conf/cube.conf b/serving/server/plsc_serving/server/conf/cube.conf
@@ -0,0 +1,15 @@
+[{
+    "dict_name": "dict",
+    "shard": 2,
+    "dup": 1,
+    "timeout": 200,
+    "retry": 3,
+    "backup_request": 100,
+    "type": "ipport_list",
+    "load_balancer": "rr",
+    "nodes": [{
+        "ipport_list": "list://xxx.xxx.xxx.xxx:8000"
+    },{ 
+        "ipport_list": "list://xxx.xxx.xxx.xxx:8000"
+    }]  
+}]
diff --git a/serving/server/plsc_serving/server/conf/gflags.conf b/serving/server/plsc_serving/server/conf/gflags.conf
@@ -0,0 +1,2 @@
+--enable_model_toolkit
+--enable_cube=false
diff --git a/serving/server/plsc_serving/server/conf/model_toolkit.prototxt b/serving/server/plsc_serving/server/conf/model_toolkit.prototxt
@@ -0,0 +1,11 @@
+engines {
+  name: "face_classify_model"
+  type: "FLUID_GPU_ANALYSIS_DIR"
+  reloadable_meta: "./data/model/paddle/fluid_time_file"
+  reloadable_type: "timestamp_ne"
+  model_data_path: "/home/xulongteng/.paddlehub/bert_service/bert_chinese_L-12_H-768_A-12"
+  runtime_thread_num: 0
+  batch_infer_size: 0
+  enable_batch_align: 0
+  enable_memory_optimization: 1
+}
diff --git a/serving/server/plsc_serving/server/conf/model_toolkit.prototxt.0 b/serving/server/plsc_serving/server/conf/model_toolkit.prototxt.0
@@ -0,0 +1,11 @@
+engines {
+  name: "bert"
+  type: "FLUID_GPU_ANALYSIS_DIR"
+  reloadable_meta: "./data/model/paddle/fluid_time_file"
+  reloadable_type: "timestamp_ne"
+  model_data_path: "/home/xulongteng/.paddlehub/bert_service/bert_chinese_L-12_H-768_A-12"
+  runtime_thread_num: 0
+  batch_infer_size: 0
+  enable_batch_align: 0
+  enable_memory_optimization: 0
+}