PaddlePaddle
diff --git a/‎paddle/function/BufferArg.cpp‎
Lines changed: 8 additions & 2 deletions b/‎paddle/function/BufferArg.cpp‎
Lines changed: 8 additions & 2 deletions
diff --git a/‎paddle/function/BufferArg.h‎
Lines changed: 68 additions & 22 deletions b/‎paddle/function/BufferArg.h‎
Lines changed: 68 additions & 22 deletions
diff --git a/‎paddle/function/CMakeLists.txt‎
Lines changed: 1 addition & 0 deletions b/‎paddle/function/CMakeLists.txt‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎paddle/function/FunctionTest.h‎
Lines changed: 89 additions & 10 deletions b/‎paddle/function/FunctionTest.h‎
Lines changed: 89 additions & 10 deletions
@@ -32,14 +32,20 @@ const SparseMatrixArg& BufferArg::sparse() const {
 SparseMatrixArg::SparseMatrixArg(const CpuSparseMatrix& sparse, ArgType argType)
     : BufferArg(sparse, argType),
       row_(reinterpret_cast<void*>(sparse.getRows()), VALUE_TYPE_INT32),
-      col_(reinterpret_cast<void*>(sparse.getCols()), VALUE_TYPE_INT32) {
+      col_(reinterpret_cast<void*>(sparse.getCols()), VALUE_TYPE_INT32),
+      nnz_(sparse.getElementCnt()),
+      format_(static_cast<SparseDataFormat>(sparse.getFormat())),
+      type_(static_cast<SparseDataType>(sparse.getValueType())) {
   bufferType_ = TENSOR_SPARSE;
 }
 
 SparseMatrixArg::SparseMatrixArg(const GpuSparseMatrix& sparse, ArgType argType)
     : BufferArg(sparse, argType),
       row_(reinterpret_cast<void*>(sparse.getRows()), VALUE_TYPE_INT32),
-      col_(reinterpret_cast<void*>(sparse.getCols()), VALUE_TYPE_INT32) {
+      col_(reinterpret_cast<void*>(sparse.getCols()), VALUE_TYPE_INT32),
+      nnz_(sparse.getElementCnt()),
+      format_(static_cast<SparseDataFormat>(sparse.getFormat())),
+      type_(static_cast<SparseDataType>(sparse.getValueType())) {
   bufferType_ = TENSOR_SPARSE;
 }
 
 
@@ -30,13 +30,6 @@ enum BufferType {
   TENSOR_SPARSE = 4
 };
 
-enum SparseDataType {
-  SPARSE_NO_VALUE = 0,  // do not need value pointer, all values are 1
-  SPARSE_FLOAT_VALUE = 1
-};
-
-enum SparseDataFormat { SPARSE_CSR_FORMAT = 0, SPARSE_CSC_FORMAT = 1 };
-
 class BufferArg;
 class SequenceArg;
 class SparseMatrixArg;
@@ -79,19 +72,21 @@ class BufferArg {
   BufferArg(ValueType valueType,
             const TensorShape& shape,
             ArgType argType = UNSPECIFIED)
-      : buf_(nullptr),
-        valueType_(valueType),
-        shape_(shape),
-        argType_(argType) {}
+      : buf_(nullptr), valueType_(valueType), shape_(shape), argType_(argType) {
+    bufferType_ = TENSOR_NORMAL;
+  }
 
   BufferArg(void* buf,
             ValueType valueType,
             const TensorShape& shape,
             ArgType argType = UNSPECIFIED)
-      : buf_(buf), valueType_(valueType), shape_(shape), argType_(argType) {}
+      : buf_(buf), valueType_(valueType), shape_(shape), argType_(argType) {
+    bufferType_ = TENSOR_NORMAL;
+  }
 
-  BufferArg(void* buf, ValueType valueType)
-      : buf_(buf), valueType_(valueType) {}
+  BufferArg(void* buf, ValueType valueType) : buf_(buf), valueType_(valueType) {
+    bufferType_ = TENSOR_NORMAL;
+  }
 
   BufferArg(const Matrix& matrix, ArgType argType = UNSPECIFIED)
       : buf_(
@@ -167,8 +162,9 @@ class BufferArg {
   ValueType valueType() const { return valueType_; }
   BufferType bufferType() const { return bufferType_; }
   const TensorShape& shape() const { return shape_; }
-  bool isSparse() const { return (TENSOR_SPARSE == bufferType_); }
+  bool isSparseArg() const { return TENSOR_SPARSE == bufferType_; }
   bool isSequenceArg() const { return TENSOR_SEQUENCE_DATA == bufferType_; }
+  virtual size_t numElements() const { return shape_.getElements(); }
 
   const SequenceArg& sequence() const;
   const SparseMatrixArg& sparse() const;
@@ -179,6 +175,7 @@ class BufferArg {
   TensorShape shape_;
   BufferType bufferType_{TENSOR_UNKNOWN};
   ArgType argType_{UNSPECIFIED};
+  // TODO(tianbing), add deviceType_
   // leading dimensions. The size is dims_.size()
   // Dims lds_;
 };
@@ -191,6 +188,7 @@ class SequenceIdArg : public BufferArg {
 public:
   SequenceIdArg(const TensorShape& shape, ArgType argType = UNSPECIFIED)
       : BufferArg(VALUE_TYPE_INT32, shape, argType) {
+    bufferType_ = TENSOR_SEQUENCE_ID;
     CHECK_EQ(shape_.ndims(), (size_t)1);
     CHECK_GT(shape_[0], 1);
     numSeqs_ = shape_[0] - 1;
@@ -228,7 +226,9 @@ class SequenceArg : public BufferArg {
   SequenceArg(ValueType valueType,
               const TensorShape& shape,
               ArgType argType = UNSPECIFIED)
-      : BufferArg(valueType, shape, argType), startPositions_(TensorShape()) {}
+      : BufferArg(valueType, shape, argType), startPositions_(TensorShape()) {
+    bufferType_ = TENSOR_SEQUENCE_DATA;
+  }
 
   SequenceArg(void* buf,
               ValueType valueType,
@@ -269,31 +269,75 @@ class SparseMatrixArg : public BufferArg {
                   const BufferArg& row,
                   const BufferArg& col,
                   size_t nnz,
-                  SparseDataFormat format,
-                  SparseDataType type,
+                  SparseFormat format,
+                  SparseValueType type,
                   ArgType argType = UNSPECIFIED)
       : BufferArg(buf, valueType, shape, argType),
         row_(row),
         col_(col),
         nnz_(nnz),
-        format_(format),
-        type_(type) {
+        format_(static_cast<SparseDataFormat>(format)),
+        type_(static_cast<SparseDataType>(type)) {
     bufferType_ = TENSOR_SPARSE;
     CHECK((valueType == VALUE_TYPE_FLOAT) || (valueType == VALUE_TYPE_DOUBLE));
     CHECK_EQ(shape_.ndims(), (size_t)2);
     CHECK_EQ(row_.shape().ndims(), (size_t)1);
     CHECK_EQ(col_.shape().ndims(), (size_t)1);
-    if (format == SPARSE_CSR_FORMAT) {
+    if (format_ == T_SPARSE_CSR) {
       CHECK_EQ(nnz, col.shape()[0]);
-    } else if (format == SPARSE_CSC_FORMAT) {
+    } else if (format_ == T_SPARSE_CSC) {
       CHECK_EQ(nnz, row.shape()[0]);
     }
   }
 
+  SparseMatrixArg(ValueType valueType,
+                  const TensorShape& shape,
+                  size_t nnz,
+                  SparseFormat format,
+                  SparseValueType type,
+                  ArgType argType = UNSPECIFIED)
+      : BufferArg(valueType, shape, argType),
+        row_(BufferArg(nullptr, VALUE_TYPE_INT32)),
+        col_(BufferArg(nullptr, VALUE_TYPE_INT32)),
+        nnz_(nnz),
+        format_(static_cast<SparseDataFormat>(format)),
+        type_(static_cast<SparseDataType>(type)) {
+    bufferType_ = TENSOR_SPARSE;
+    CHECK((valueType == VALUE_TYPE_FLOAT) || (valueType == VALUE_TYPE_DOUBLE));
+    CHECK_EQ(shape_.ndims(), (size_t)2);
+
+    /// len of row_ : height + 1 (CSR) or nnz (CSC), buf_ == nullptr
+    row_ = (format_ == T_SPARSE_CSR
+                ? BufferArg(VALUE_TYPE_INT32, TensorShape{shape_[0] + 1})
+                : BufferArg(VALUE_TYPE_INT32, TensorShape{nnz}));
+    /// len of col_ :  width + 1 (CSC) or nnz (CSR), buf_ == nullptr
+    col_ = (format_ == T_SPARSE_CSR
+                ? BufferArg(VALUE_TYPE_INT32, TensorShape{nnz})
+                : BufferArg(VALUE_TYPE_INT32, TensorShape{shape_[1] + 1}));
+  }
+
   SparseMatrixArg(const CpuSparseMatrix& sparse, ArgType argType = UNSPECIFIED);
 
   SparseMatrixArg(const GpuSparseMatrix& sparse, ArgType argType = UNSPECIFIED);
 
+  template <DeviceType DType>
+  typename Tensor<real, DType>::SparseMatrix SparseMatrix() const {
+    CHECK(buf_);
+    CHECK(valueType_ == DataType<real>::value);
+    // CHECK(deviceType_ == DType);
+    CHECK_EQ(2, shape_.ndims());
+    return typename Tensor<real, DType>::SparseMatrix(
+        reinterpret_cast<real*>(buf_),
+        reinterpret_cast<int*>(row_.data()),
+        reinterpret_cast<int*>(col_.data()),
+        shape_[0],
+        shape_[1],
+        nnz_,
+        static_cast<SparseValueType>(type_),
+        static_cast<SparseFormat>(format_),
+        false);
+  }
+
   ~SparseMatrixArg() {}
 
   void* getRowBuf() const { return row_.data(); }
@@ -302,6 +346,8 @@ class SparseMatrixArg : public BufferArg {
 
   size_t nnz() const { return nnz_; }
 
+  size_t numElements() const override { return nnz_; }
+
   SparseDataFormat dataFormat() const { return format_; }
 
   SparseDataType dataType() const { return type_; }
 
@@ -26,6 +26,7 @@ if(WITH_TESTING)
     add_simple_unittest(FunctionTest)
     add_simple_unittest(ContextProjectionOpTest)
     add_simple_unittest(PadOpTest)
+    add_simple_unittest(MulOpTest)
 endif()
 endif()
 
 
@@ -13,7 +13,8 @@ See the License for the specific language governing permissions and
 limitations under the License. */
 
 #include "Function.h"
-#include "paddle/math/Vector.h"
+#include "paddle/math/Matrix.h"
+#include "paddle/math/SparseMatrix.h"
 #include "paddle/math/tests/TensorCheck.h"
 #include "paddle/testing/TestUtil.h"
 
@@ -69,7 +70,7 @@ class FunctionCompare {
   }
 
   // output need only contains shape, do not contains data.
-  void addOutputs(const BufferArg& output) {
+  void addOutputs(const BufferArg& output, ArgType argType = ASSIGN_TO) {
     size_t size =
         output.shape().getElements() * sizeOfValuType(output.valueType());
     cpuMemory_.emplace_back(std::make_shared<CpuMemoryHandle>(size));
@@ -79,12 +80,40 @@ class FunctionCompare {
         std::make_shared<BufferArg>(cpuMemory_.back()->getBuf(),
                                     output.valueType(),
                                     output.shape(),
-                                    ASSIGN_TO));
+                                    argType));
     gpuOutputs_.emplace_back(
         std::make_shared<BufferArg>(gpuMemory_.back()->getBuf(),
                                     output.valueType(),
                                     output.shape(),
-                                    ASSIGN_TO));
+                                    argType));
+  }
+
+  /// add and init output sparse matrix
+  void addOutputs(const SparseMatrixArg& output, ArgType argType = ASSIGN_TO) {
+    cpuSparse_ = std::make_shared<CpuSparseMatrix>(
+        output.shape()[0],
+        output.shape()[1],
+        output.nnz(),
+        static_cast<SparseValueType>(output.dataType()),
+        static_cast<SparseFormat>(output.dataFormat()));
+
+    gpuSparse_ = std::make_shared<GpuSparseMatrix>(
+        output.shape()[0],
+        output.shape()[1],
+        output.nnz(),
+        static_cast<SparseValueType>(output.dataType()),
+        static_cast<SparseFormat>(output.dataFormat()));
+
+    /// init sparse matrix
+    hl_stream_t stream(HPPL_STREAM_1);
+    cpuSparse_->randomizeUniform();
+    gpuSparse_->copyFrom(*cpuSparse_, stream);
+    hl_stream_synchronize(stream);
+
+    cpuOutputs_.emplace_back(
+        std::make_shared<SparseMatrixArg>(*cpuSparse_, argType));
+    gpuOutputs_.emplace_back(
+        std::make_shared<SparseMatrixArg>(*gpuSparse_, argType));
   }
 
   void addInputs(const SequenceArg& input) {
@@ -107,10 +136,36 @@ class FunctionCompare {
     // TODO: need be implemented.
   }
 
+  void addInputs(const SparseMatrixArg& input) {
+    cpuSparse_ = std::make_shared<CpuSparseMatrix>(
+        input.shape()[0],
+        input.shape()[1],
+        input.nnz(),
+        static_cast<SparseValueType>(input.dataType()),
+        static_cast<SparseFormat>(input.dataFormat()));
+
+    gpuSparse_ = std::make_shared<GpuSparseMatrix>(
+        input.shape()[0],
+        input.shape()[1],
+        input.nnz(),
+        static_cast<SparseValueType>(input.dataType()),
+        static_cast<SparseFormat>(input.dataFormat()));
+
+    /// init sparse matrix
+    hl_stream_t stream(HPPL_STREAM_1);
+    cpuSparse_->randomizeUniform();
+    gpuSparse_->copyFrom(*cpuSparse_, stream);
+    hl_stream_synchronize(stream);
+
+    cpuInputs_.emplace_back(std::make_shared<SparseMatrixArg>(*cpuSparse_));
+    gpuInputs_.emplace_back(std::make_shared<SparseMatrixArg>(*gpuSparse_));
+  }
+
   void run() {
     // prepare cpu/gpu arguments
     initInputs();
 
+    initOutputs();
     // function calculate
     auto callFunction = [](FunctionBase* function,
                            std::vector<BufferArgPtr>& inputs,
@@ -129,7 +184,7 @@ class FunctionCompare {
     callFunction(cpuFunc_.get(), cpuInputs_, cpuOutputs_);
     callFunction(gpuFunc_.get(), gpuInputs_, gpuOutputs_);
 
-    // check outputs and inouts
+    // check outputs
     compareOutputs();
   }
 
@@ -140,6 +195,10 @@ class FunctionCompare {
 protected:
   void initInputs() {
     for (size_t i = 0; i < cpuInputs_.size(); i++) {
+      if (cpuInputs_[i]->isSparseArg()) {
+        continue;  /// sparse matrix already init
+      }
+
       initArg(*cpuInputs_[i]);
 
       // TODO: Need a BufferCopy used to copy from one BufferArg to another.
@@ -152,14 +211,32 @@ class FunctionCompare {
     }
   }
 
+  void initOutputs() {
+    for (size_t i = 0; i < cpuOutputs_.size(); i++) {
+      if (cpuOutputs_[i]->isSparseArg()) {
+        continue;  /// sparse matrix already init
+      }
+
+      initArg(*cpuOutputs_[i]);
+
+      // TODO: Need a BufferCopy used to copy from one BufferArg to another.
+      CpuVector cpuVector(cpuOutputs_[i]->shape().getElements(),
+                          (real*)cpuOutputs_[i]->data());
+      GpuVector gpuVector(gpuOutputs_[i]->shape().getElements(),
+                          (real*)gpuOutputs_[i]->data());
+
+      gpuVector.copyFrom(cpuVector);
+    }
+  }
+
   void compareOutputs() {
     for (size_t i = 0; i < cpuOutputs_.size(); i++) {
       // TODO, Need a BufferCheck used to compare the two buffers.
-      auto cpu = cpuOutputs_[i];
-      auto gpu = gpuOutputs_[i];
-      CpuVector cpuVector(cpu->shape().getElements(), (real*)cpu->data());
-      GpuVector gpuVector(cpu->shape().getElements(), (real*)gpu->data());
-
+      const auto cpu = cpuOutputs_[i];
+      const auto gpu = gpuOutputs_[i];
+      CHECK_EQ(cpu->numElements(), gpu->numElements());
+      CpuVector cpuVector(cpu->numElements(), (real*)cpu->data());
+      GpuVector gpuVector(gpu->numElements(), (real*)gpu->data());
       autotest::TensorCheckErr(cpuVector, gpuVector);
     }
   }
@@ -195,6 +272,8 @@ class FunctionCompare {
   std::vector<BufferArgPtr> cpuOutputs_;
   std::vector<BufferArgPtr> gpuInputs_;
   std::vector<BufferArgPtr> gpuOutputs_;
+  std::shared_ptr<CpuSparseMatrix> cpuSparse_;
+  std::shared_ptr<GpuSparseMatrix> gpuSparse_;
 };
 
 }  // namespace paddle