Skip to content

Commit

Permalink
update benchmark infer
Browse files Browse the repository at this point in the history
  • Loading branch information
MrChengmo committed Jan 25, 2021
1 parent ee1caad commit 412521c
Show file tree
Hide file tree
Showing 3 changed files with 48 additions and 21 deletions.
17 changes: 13 additions & 4 deletions models/recall/word2vec/benchmark/w2v_infer.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,22 +86,26 @@ def _load_emb(var):
def infer_epoch(args, vocab_size, test_reader, use_cuda, i2w):
""" inference function """
epoch_model_path_list = []
epoch_model_name_list = []

for file in os.listdir(model_dir):
file_path = os.path.join(model_dir, file)
# hard code for epoch model folder
if os.path.isdir(file_path) and is_number(file):
epoch_model_path_list.append(file_path)
epoch_model_name_list.append(file)

if len(epoch_model_path_list) == 0:
return
self.epoch_model_path_list = self.epoch_model_path_list.sort()
print("Save model len {}".format(len(epoch_model_path_list)))

place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
exe = fluid.Executor(place)
emb_size = args.emb_size
batch_size = args.batch_size
result_dict = collections.OrderedDict()

result_dict = {}
result_dict["result"] = {}

with fluid.scope_guard(fluid.Scope()):
main_program = fluid.Program()
with fluid.program_guard(main_program):
Expand Down Expand Up @@ -153,7 +157,12 @@ def infer_epoch(args, vocab_size, test_reader, use_cuda, i2w):
print("model: {} \t acc: {} ".format(
model_path, 1.0 * accum_num / accum_num_sum))
epoch_acc = 1.0 * accum_num / accum_num_sum
result_dict[epoch] = epoch_acc
epoch_name = model_path.split("/")[-1]
result_dict["result"][epoch_name] = epoch_acc

print("infer_result_dict: {}".format(result_dict))
with open("./infer_result_dict.txt", 'w+') as f:
f.write(str(result_dict))


def BuildWord_IdMap(dict_path):
Expand Down
21 changes: 15 additions & 6 deletions tools/static_ps_trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,9 @@
# limitations under the License.

from __future__ import print_function
from utils.static_ps.reader_helper import get_reader, get_example_num, get_file_list, get_word_num
from utils.static_ps.program_helper import get_model, get_strategy
from utils.static_ps.common import YamlHelper, is_distributed_env
import argparse
import time
import sys
Expand All @@ -25,9 +28,6 @@

__dir__ = os.path.dirname(os.path.abspath(__file__))
sys.path.append(os.path.abspath(os.path.join(__dir__, '..')))
from utils.static_ps.common import YamlHelper, is_distributed_env
from utils.static_ps.program_helper import get_model, get_strategy
from utils.static_ps.reader_helper import get_reader, get_example_num, get_file_list, get_word_num

logging.basicConfig(
format='%(asctime)s - %(levelname)s - %(message)s', level=logging.INFO)
Expand Down Expand Up @@ -59,6 +59,8 @@ def __init__(self, config):
self.input_data = None
self.reader = None
self.exe = None
self.train_result_dict = {}
self.train_result_dict["speed"] = []

def run(self):
fleet.init()
Expand All @@ -68,6 +70,7 @@ def run(self):
elif fleet.is_worker():
self.run_worker()
fleet.stop_worker()
self.record_result()
logger.info("Run Success, Exit.")

def network(self):
Expand Down Expand Up @@ -125,6 +128,7 @@ def run_worker(self):
logger.info(
"Epoch: {}, using time {} second, ips {} {}/sec.".format(
epoch, epoch_time, epoch_speed, self.count_method))
self.train_result_dict["speed"].append(epoch_speed)

model_dir = "{}/{}".format(save_model_path, epoch)
if fleet.is_first_worker(
Expand Down Expand Up @@ -239,9 +243,9 @@ def recdataset_train_loop(self, epoch):
" avg_reader_cost: {:.5f} sec, avg_batch_cost: {:.5f} sec, avg_samples: {:.5f}, ips: {:.5f} {}/sec".
format(train_reader_cost / print_interval, (
train_reader_cost + train_run_cost) / print_interval,
total_samples / print_interval, total_samples / (
train_reader_cost + train_run_cost),
self.count_method))
total_samples / print_interval, total_samples / (
train_reader_cost + train_run_cost),
self.count_method))
train_reader_cost = 0.0
train_run_cost = 0.0
total_samples = 0
Expand Down Expand Up @@ -289,6 +293,11 @@ def heter_train_loop(self, epoch):
self.reader.reset()
break

def record_result(self):
logger.info("train_result_dict: {}".format(self.train_result_dict))
with open("./train_result_dict.txt", 'w+') as f:
f.write(str(self.train_result_dict))


if __name__ == "__main__":
paddle.enable_static()
Expand Down
31 changes: 20 additions & 11 deletions tools/utils/static_ps/infer.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,9 @@
# limitations under the License.

from __future__ import print_function
from reader_helper import get_reader, get_infer_reader, get_example_num, get_file_list, get_word_num
from program_helper import get_model, get_strategy
from common import YamlHelper, is_number
import os
import numpy as np
import warnings
Expand All @@ -26,9 +29,6 @@

__dir__ = os.path.dirname(os.path.abspath(__file__))
sys.path.append(os.path.abspath(os.path.join(__dir__, '..')))
from common import YamlHelper, is_number
from program_helper import get_model, get_strategy
from reader_helper import get_reader, get_infer_reader, get_example_num, get_file_list, get_word_num

logging.basicConfig(
format='%(asctime)s - %(levelname)s - %(message)s', level=logging.INFO)
Expand Down Expand Up @@ -61,7 +61,8 @@ def __init__(self, config):
self.reader = None
self.exe = None
self.epoch_model_path_list = []
self.epoch_model_name_list = []
self.infer_result_dict = {}
self.infer_result_dict["result"] = {}

def run(self):
self.network()
Expand All @@ -76,19 +77,20 @@ def run(self):
# hard code for epoch model folder
if os.path.isdir(file_path) and is_number(file):
self.epoch_model_path_list.append(file_path)
self.epoch_model_name_list.append(file)

if len(self.epoch_model_path_list) == 0:
self.epoch_model_path_list.append(init_model_path)
self.epoch_model_name_list.append(init_model_path)

self.epoch_model_path_list.sort()
self.epoch_model_name_list.sort()
self.epoch_model_path_list = self.epoch_model_path_list.sort()

for idx, model_path in enumerate(self.epoch_model_path_list):
logger.info("Begin Infer Model {}".format(
self.epoch_model_name_list[idx]))
self.run_infer(model_path, self.epoch_model_name_list[idx])
self.epoch_model_path_list[idx]))
model_name = model_path.split("/")[-1]
infer_res = self.run_infer(model_path, model_name)
self.infer_result_dict["result"][model_name] = infer_res

self.record_result()
logger.info("Run Success, Exit.")

def network(self):
Expand All @@ -102,7 +104,7 @@ def run_infer(self, model_path, model_name):
dirname=model_path, executor=self.exe))

self.reset_auc()

infer_res = []
for batch_id, data in enumerate(self.reader()):
results = self.exe.run(inference_program,
feed=data,
Expand All @@ -114,8 +116,10 @@ def run_infer(self, model_path, model_name):
for var_idx, var_name in enumerate(results):
metrics_string += "Infer res: {}, ".format(results[
var_idx])
infer_res.append(results[var_idx])
logger.info("Model: {}, Batch: {}, {}".format(
model_name, batch_id, metrics_string))
return np.mean(infer_res)

def init_reader(self):
self.reader, self.file_list = get_infer_reader(self.input_data, config)
Expand Down Expand Up @@ -146,6 +150,11 @@ def reset_auc(self):
tensor.set(tensor_array, paddle.CPUPlace())
logger.info("AUC Reset To Zero: {}".format(name))

def record_result(self):
logger.info("infer_result_dict: {}".format(self.infer_result_dict))
with open("./infer_result_dict.txt", 'w+') as f:
f.write(str(self.infer_result_dict))


if __name__ == "__main__":
paddle.enable_static()
Expand Down

0 comments on commit 412521c

Please sign in to comment.