Skip to content

Benchmark of tensor serialization performance. #4610

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 3 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 4 additions & 3 deletions paddle/framework/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
# ddim lib
proto_library(framework_proto SRCS framework.proto)

cc_library(ddim SRCS ddim.cc DEPS eigen3)
cc_test(ddim_test SRCS ddim_test.cc DEPS ddim)
nv_test(dim_test SRCS dim_test.cu DEPS ddim)

cc_library(tensor SRCS tensor.cc DEPS ddim place paddle_memory device_context)
cc_test(tensor_test SRCS tensor_test.cc DEPS tensor)
cc_library(tensor SRCS tensor.cc DEPS ddim place paddle_memory device_context framework_proto)
cc_test(mytensor_test SRCS tensor_test.cc DEPS tensor glog)
cc_test(eigen_test SRCS eigen_test.cc DEPS tensor)

cc_library(lod_tensor SRCS lod_tensor.cc DEPS ddim place tensor)
Expand All @@ -16,7 +18,6 @@ cc_test(variable_test SRCS variable_test.cc)
cc_library(scope SRCS scope.cc)
cc_test(scope_test SRCS scope_test.cc DEPS scope)

proto_library(framework_proto SRCS framework.proto)

cc_library(attribute SRCS attribute.cc DEPS framework_proto)
cc_library(proto_desc SRCS var_desc.cc op_desc.cc block_desc.cc program_desc.cc DEPS attribute)
Expand Down
12 changes: 12 additions & 0 deletions paddle/framework/framework.proto
Original file line number Diff line number Diff line change
Expand Up @@ -116,3 +116,15 @@ message BlockDesc {
}

message ProgramDesc { repeated BlockDesc blocks = 1; }

message TTensor {
optional DataType type = 1;
optional int64 size = 2;
// repeated float content = 3 [ packed = true ];
repeated float content = 3;
}

message TensorMeta {
optional DataType type = 1;
optional int64 size = 2;
}
12 changes: 12 additions & 0 deletions paddle/framework/tensor.h
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,18 @@ class Tensor {

std::type_index type() const { return holder_->type(); }

std::string SerializeToString1() const;

void DeserializeFromString1(const std::string& s);

std::string SerializeToString2() const;

void DeserializeFromString2(const std::string& s);

std::string SerializeToString3() const;

void DeserializeFromString3(const std::string& s);

private:
template <typename T>
inline void check_memory_size() const;
Expand Down
101 changes: 101 additions & 0 deletions paddle/framework/tensor_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,14 @@ See the License for the specific language governing permissions and
limitations under the License. */

#pragma once
#include <glog/logging.h>
#include <stdio.h>
#include <string.h>
#include <sstream>
#include "paddle/framework/framework.pb.h"
#include "paddle/memory/memcpy.h"
#include "paddle/platform/enforce.h"
// #include "paddle/platform/place.h"

namespace paddle {
namespace framework {
Expand Down Expand Up @@ -162,5 +168,100 @@ inline Tensor ReshapeToMatrix(const Tensor& src, int num_col_dims) {
return res;
}

std::string Tensor::SerializeToString1() const {
TTensor proto;
proto.set_type(DataType::FP32);
proto.set_size(holder_->size());
for (int i = 0; i < holder_->size(); ++i) {
proto.add_content(static_cast<float*>(holder_->ptr())[i]);
}
std::string s = proto.SerializeAsString();
return s;
}

void Tensor::DeserializeFromString1(const std::string& s) {
TTensor proto;
proto.ParseFromString(s);
this->Resize({proto.size()});
float* p = this->mutable_data<float>(platform::CPUPlace());
// p[0] = 0;
for (int i = 0; i < proto.size(); ++i) {
p[i] = proto.content()[i];
};
}

std::string Tensor::SerializeToString2() const {
// use new instead of our malloc to compare with protobuf
const int BUFFER_SIZE = holder_->size() + 100;
char* buffer = new char[BUFFER_SIZE];
// memset(buffer, BUFFER_SIZE, '\n');
int length = static_cast<int>(holder_->size());

// data type 5 => float32
memset(buffer, 5, sizeof(int));
// data size
memcpy(buffer + sizeof(int), &length, sizeof(size_t));
memcpy(buffer + sizeof(int) * 2, static_cast<float*>(holder_->ptr()),
holder_->size());
std::string ret = std::string(buffer, BUFFER_SIZE);
delete[] buffer;
return ret;
}

// option2
void Tensor::DeserializeFromString2(const std::string& s) {
int data_type, length;
char* buffer = const_cast<char*>(s.c_str());
memcpy(&data_type, buffer, sizeof(int));
memcpy(&length, buffer + sizeof(int), sizeof(int));

this->Resize({length});
float* p = this->mutable_data<float>(platform::CPUPlace());
memcpy(p, buffer + sizeof(int) * 2, length);
}

// option3
std::string Tensor::SerializeToString3() const {
TensorMeta proto;
proto.set_type(DataType::FP32);
proto.set_size(holder_->size());
std::string str = proto.SerializeAsString();
char* proto_buffer = const_cast<char*>(str.c_str());

const int BUFFER_SIZE = str.size() + holder_->size() + sizeof(int) * 2;

char* buffer = new char[BUFFER_SIZE];
int proto_len = str.size();
int buffer_len = holder_->size();

memcpy(buffer, &proto_len, sizeof(int));

memcpy(buffer + sizeof(int), &buffer_len, sizeof(int));

memcpy(buffer + sizeof(int) * 2, proto_buffer, str.size());

memcpy(buffer + str.size() + sizeof(int) * 2,
static_cast<float*>(holder_->ptr()), holder_->size());

std::string ret(buffer, BUFFER_SIZE);
delete[] buffer;
return ret;
}

void Tensor::DeserializeFromString3(const std::string& s) {
int proto_len, buffer_len;
proto_len = buffer_len = -1;
char* buffer = const_cast<char*>(s.c_str());
memcpy(&proto_len, buffer, sizeof(int));
memcpy(&buffer_len, buffer + sizeof(int), sizeof(int));
char* proto_buffer = new char[proto_len];

this->Resize({buffer_len});
float* p = this->mutable_data<float>(platform::CPUPlace());

memcpy(proto_buffer, buffer + sizeof(int) * 2, proto_len);
memcpy(p, buffer + sizeof(int) * 2 + proto_len, buffer_len);
}

} // namespace framework
} // namespace paddle
128 changes: 128 additions & 0 deletions paddle/framework/tensor_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,11 @@

#include "paddle/framework/tensor.h"
#include <gtest/gtest.h>
#include <time.h>
#include <unistd.h>
#include <functional>
#include <string>
#include <vector>

TEST(Tensor, Dims) {
using namespace paddle::framework;
Expand Down Expand Up @@ -275,3 +279,127 @@ TEST(Tensor, ReshapeToMatrix) {
ASSERT_EQ(res.dims()[0], 2 * 3);
ASSERT_EQ(res.dims()[1], 4 * 9);
}

TEST(Tensor, option1) {
using namespace paddle::framework;
using namespace paddle::platform;
Tensor src;
float* src_ptr = src.mutable_data<float>({2, 3}, CPUPlace());
for (int i = 0; i < 2 * 3; ++i) {
src_ptr[i] = i;
}
std::string s1 = src.SerializeToString1();

Tensor dst;
dst.DeserializeFromString1(s1);
float* dst_ptr = dst.data<float>();

for (int i = 0; i < 2 * 3; ++i) {
EXPECT_EQ(dst_ptr[i], src_ptr[i]);
}
}

TEST(Tensor, option2) {
using namespace paddle::framework;
using namespace paddle::platform;
Tensor src;
float* src_ptr = src.mutable_data<float>({2, 3}, CPUPlace());
for (int i = 0; i < 2 * 3; ++i) {
src_ptr[i] = i;
}
std::string s1 = src.SerializeToString2();

Tensor dst;
dst.DeserializeFromString2(s1);
float* dst_ptr = dst.data<float>();

for (int i = 0; i < 2 * 3; ++i) {
EXPECT_EQ(dst_ptr[i], src_ptr[i]);
}
}

TEST(Tensor, option3) {
using namespace paddle::framework;
using namespace paddle::platform;
Tensor src;
float* src_ptr = src.mutable_data<float>({2, 3}, CPUPlace());
for (int i = 0; i < 2 * 3; ++i) {
src_ptr[i] = i;
}
std::string s1 = src.SerializeToString3();

Tensor dst;
dst.DeserializeFromString3(s1);
float* dst_ptr = dst.data<float>();

for (int i = 0; i < 2 * 3; ++i) {
EXPECT_EQ(dst_ptr[i], src_ptr[i]);
}
}

TEST(Tensor, TestSpeed) {
using namespace paddle::framework;
using namespace paddle::platform;
Tensor src;
Tensor dst;

std::vector<int> arr = {1, 10, 100, 1000};
const int STOP = 1000;
std::vector<std::vector<double>> metric(3);

for (int j = 0; j < STOP; ++j) {
for (int i = 1; i < arr.size(); i++) {
const long long ROUND_NUM = arr[i];
float* src_ptr =
src.mutable_data<float>({ROUND_NUM, ROUND_NUM}, CPUPlace());
src_ptr[0] = 0;
memset(src_ptr, 0, sizeof(float) * ROUND_NUM * ROUND_NUM);
const clock_t start = clock();
std::string s1 = src.SerializeToString1();
dst.DeserializeFromString1(s1);
double seconds = (clock() - start) / (double)(CLOCKS_PER_SEC);
metric[0].push_back(seconds);
LOG(INFO) << "option1 : " << ROUND_NUM << ": cost " << seconds;
}

for (int i = 1; i < arr.size(); i++) {
const long long ROUND_NUM = arr[i];
float* src_ptr =
src.mutable_data<float>({ROUND_NUM, ROUND_NUM}, CPUPlace());
memset(src_ptr, 0, sizeof(float) * ROUND_NUM * ROUND_NUM);
const clock_t start = clock();
std::string s1 = src.SerializeToString2();
dst.DeserializeFromString2(s1);
double seconds = (clock() - start) / (double)(CLOCKS_PER_SEC);
metric[1].push_back(seconds);
LOG(INFO) << "option2 : " << ROUND_NUM << ": cost " << seconds;
}

for (int i = 1; i < arr.size(); i++) {
const long long ROUND_NUM = arr[i];
float* src_ptr =
src.mutable_data<float>({ROUND_NUM, ROUND_NUM}, CPUPlace());
memset(src_ptr, 0, sizeof(float) * ROUND_NUM * ROUND_NUM);
const clock_t start = clock();
std::string s1 = src.SerializeToString3();
dst.DeserializeFromString3(s1);
double seconds = (clock() - start) / (double)(CLOCKS_PER_SEC);
metric[2].push_back(seconds);
LOG(INFO) << "option3 : " << ROUND_NUM << ": cost " << seconds;
}
}

auto avg = [&](const std::vector<double>& data) {
double ret = .0;
for (int i = 0; i < data.size(); ++i) {
ret += data[i];
}
if (data.size() == 0) return .0;
return ret / data.size();
};

for (int i = 0; i < 3; ++i) {
LOG(INFO) << "option" << i << " : "
<< ": cost " << avg(metric[i]);
}
}