Skip to content

Commit 1946d67

Browse files
authored
Merge pull request #16313 from NHZlX/add_data_type_for_zerocopy
add data type for zero copy
2 parents b6aa4e9 + a3e51fa commit 1946d67

File tree

3 files changed

+18
-0
lines changed

3 files changed

+18
-0
lines changed

paddle/fluid/inference/anakin/engine.cc

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,7 @@ void AnakinEngine<TargetT, PrecisionType, RunType>::Execute(
7171
const std::map<std::string, framework::LoDTensor *> &inputs,
7272
const std::map<std::string, framework::LoDTensor *> &outputs,
7373
cudaStream_t stream) {
74+
cudaDeviceSynchronize();
7475
for (const auto &input : inputs) {
7576
auto *tensor = input.second;
7677
auto *data = tensor->data<float>();

paddle/fluid/inference/api/details/zero_copy_tensor.cc

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,19 @@ T *ZeroCopyTensor::data(PaddlePlace *place, int *size) const {
7474
return res;
7575
}
7676

77+
PaddleDType ZeroCopyTensor::type() {
78+
EAGER_GET_TENSOR;
79+
auto type = tensor->type();
80+
if (type == framework::proto::VarType::FP32) {
81+
return PaddleDType::FLOAT32;
82+
} else if (type == framework::proto::VarType::INT64) {
83+
return PaddleDType::INT64;
84+
} else {
85+
LOG(ERROR) << "unknown type, only support float32 and int64 now.";
86+
}
87+
return PaddleDType::FLOAT32;
88+
}
89+
7790
template <typename T>
7891
void ZeroCopyTensor::copy_from_cpu(const T *data) {
7992
EAGER_GET_TENSOR;
@@ -119,6 +132,7 @@ void ZeroCopyTensor::copy_to_cpu(T *data) {
119132
static_cast<const platform::CUDADeviceContext *>(pool.Get(gpu_place));
120133
memory::Copy(platform::CPUPlace(), static_cast<void *>(data), gpu_place,
121134
t_data, ele_num * sizeof(T), dev_ctx->stream());
135+
cudaDeviceSynchronize();
122136
#else
123137
PADDLE_THROW("Not compile with CUDA, should not reach here.");
124138
#endif

paddle/fluid/inference/api/paddle_api.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -176,6 +176,8 @@ class ZeroCopyTensor {
176176
device_ = device;
177177
}
178178

179+
PaddleDType type();
180+
179181
protected:
180182
explicit ZeroCopyTensor(void* scope) : scope_{scope} {}
181183
void SetName(const std::string& name) { name_ = name; }
@@ -190,6 +192,7 @@ class ZeroCopyTensor {
190192
// performance.
191193
mutable void* tensor_{nullptr};
192194
PaddlePlace place_;
195+
PaddleDType dtype_;
193196
int device_;
194197
};
195198

0 commit comments

Comments
 (0)