Introduce NumericTensor class

mrkn · mrkn · commit 16464615e346 · 2018-10-25T06:03:42.000+09:00
This commit defines the new NumericTensor&lt;T&gt; class as a subclass
of Tensor class. NumericTensor&lt;T&gt; extends Tensor class by adding
a member function to access element values in a tensor.
diff --git a/cpp/src/arrow/tensor-test.cc b/cpp/src/arrow/tensor-test.cc
@@ -104,4 +104,56 @@ TEST(TestTensor, ZeroDimensionalTensor) {
   ASSERT_EQ(t.strides().size(), 1);
 }
 
+TEST(TestNumericTensor, ElementAccess) {
+  std::vector<int64_t> shape = {3, 4};
+
+  std::vector<int64_t> values_i64 = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12};
+  std::shared_ptr<Buffer> buffer_i64(Buffer::Wrap(values_i64));
+  NumericTensor<Int64Type> t_i64(buffer_i64, shape);
+
+  ASSERT_EQ(1, t_i64.Value({0, 0}));
+  ASSERT_EQ(5, t_i64.Value({1, 0}));
+  ASSERT_EQ(6, t_i64.Value({1, 1}));
+  ASSERT_EQ(11, t_i64.Value({2, 2}));
+
+  std::vector<float> values_f32 = {1.1f, 2.1f, 3.1f, 4.1f,  5.1f,  6.1f,
+                                   7.1f, 8.1f, 9.1f, 10.1f, 11.1f, 12.1f};
+  std::shared_ptr<Buffer> buffer_f32(Buffer::Wrap(values_f32));
+  NumericTensor<FloatType> t_f32(buffer_f32, shape);
+
+  ASSERT_EQ(1.1f, t_f32.Value({0, 0}));
+  ASSERT_EQ(5.1f, t_f32.Value({1, 0}));
+  ASSERT_EQ(6.1f, t_f32.Value({1, 1}));
+  ASSERT_EQ(11.1f, t_f32.Value({2, 2}));
+}
+
+TEST(TestNumericTensor, ElementAccessWithStrides) {
+  std::vector<int64_t> shape = {3, 4};
+
+  const int64_t i64_size = sizeof(int64_t);
+  std::vector<int64_t> values_i64 = {1, 2, 3, 4, 0,  0,  5,  6, 7,
+                                     8, 0, 0, 9, 10, 11, 12, 0, 0};
+  std::vector<int64_t> strides_i64 = {i64_size * 6, i64_size};
+  std::shared_ptr<Buffer> buffer_i64(Buffer::Wrap(values_i64));
+  NumericTensor<Int64Type> t_i64(buffer_i64, shape, strides_i64);
+
+  ASSERT_EQ(1, t_i64.Value({0, 0}));
+  ASSERT_EQ(5, t_i64.Value({1, 0}));
+  ASSERT_EQ(6, t_i64.Value({1, 1}));
+  ASSERT_EQ(11, t_i64.Value({2, 2}));
+
+  const int64_t f32_size = sizeof(float);
+  std::vector<float> values_f32 = {1.1f, 2.1f,  3.1f,  4.1f,  0.0f, 0.0f,
+                                   5.1f, 6.1f,  7.1f,  8.1f,  0.0f, 0.0f,
+                                   9.1f, 10.1f, 11.1f, 12.1f, 0.0f, 0.0f};
+  std::vector<int64_t> strides_f32 = {f32_size * 6, f32_size};
+  std::shared_ptr<Buffer> buffer_f32(Buffer::Wrap(values_f32));
+  NumericTensor<FloatType> t_f32(buffer_f32, shape, strides_f32);
+
+  ASSERT_EQ(1.1f, t_f32.Value({0, 0}));
+  ASSERT_EQ(5.1f, t_f32.Value({1, 0}));
+  ASSERT_EQ(6.1f, t_f32.Value({1, 1}));
+  ASSERT_EQ(11.1f, t_f32.Value({2, 2}));
+}
+
 }  // namespace arrow
diff --git a/cpp/src/arrow/tensor.cc b/cpp/src/arrow/tensor.cc
@@ -26,6 +26,7 @@
 
 #include "arrow/compare.h"
 #include "arrow/type.h"
+#include "arrow/type_traits.h"
 #include "arrow/util/checked_cast.h"
 #include "arrow/util/logging.h"
 
@@ -121,4 +122,58 @@ Type::type Tensor::type_id() const { return type_->id(); }
 
 bool Tensor::Equals(const Tensor& other) const { return TensorEquals(*this, other); }
 
+// ----------------------------------------------------------------------
+// NumericTensor
+
+template <typename TYPE>
+NumericTensor<TYPE>::NumericTensor(const std::shared_ptr<Buffer>& data,
+                                   const std::vector<int64_t>& shape)
+    : NumericTensor(data, shape, {}, {}) {}
+
+template <typename TYPE>
+NumericTensor<TYPE>::NumericTensor(const std::shared_ptr<Buffer>& data,
+                                   const std::vector<int64_t>& shape,
+                                   const std::vector<int64_t>& strides)
+    : NumericTensor(data, shape, strides, {}) {}
+
+template <typename TYPE>
+NumericTensor<TYPE>::NumericTensor(const std::shared_ptr<Buffer>& data,
+                                   const std::vector<int64_t>& shape,
+                                   const std::vector<int64_t>& strides,
+                                   const std::vector<std::string>& dim_names)
+    : Tensor(TypeTraits<TYPE>::type_singleton(), data, shape, strides, dim_names) {}
+
+template <typename TYPE>
+int64_t NumericTensor<TYPE>::CalculateValueOffset(
+    const std::vector<int64_t>& index) const {
+  int64_t offset = 0;
+  if (strides_.size() > 0) {
+    for (size_t i = 0; i < index.size(); ++i) {
+      offset += index[i] * strides_[i];
+    }
+  } else {
+    for (size_t i = 0; i < index.size(); ++i) {
+      offset = index[i] + offset * shape_[i];
+    }
+    offset *= static_cast<int64_t>(sizeof(value_type));
+  }
+
+  return offset;
+}
+
+// ----------------------------------------------------------------------
+// Instantiate templates
+
+template class ARROW_TEMPLATE_EXPORT NumericTensor<UInt8Type>;
+template class ARROW_TEMPLATE_EXPORT NumericTensor<UInt16Type>;
+template class ARROW_TEMPLATE_EXPORT NumericTensor<UInt32Type>;
+template class ARROW_TEMPLATE_EXPORT NumericTensor<UInt64Type>;
+template class ARROW_TEMPLATE_EXPORT NumericTensor<Int8Type>;
+template class ARROW_TEMPLATE_EXPORT NumericTensor<Int16Type>;
+template class ARROW_TEMPLATE_EXPORT NumericTensor<Int32Type>;
+template class ARROW_TEMPLATE_EXPORT NumericTensor<Int64Type>;
+template class ARROW_TEMPLATE_EXPORT NumericTensor<HalfFloatType>;
+template class ARROW_TEMPLATE_EXPORT NumericTensor<FloatType>;
+template class ARROW_TEMPLATE_EXPORT NumericTensor<DoubleType>;
+
 }  // namespace arrow
diff --git a/cpp/src/arrow/tensor.h b/cpp/src/arrow/tensor.h
@@ -62,7 +62,7 @@ class ARROW_EXPORT Tensor {
   Tensor(const std::shared_ptr<DataType>& type, const std::shared_ptr<Buffer>& data,
          const std::vector<int64_t>& shape, const std::vector<int64_t>& strides);
 
-  /// Constructor with strides and dimension names
+  /// Constructor with non-negative strides and dimension names
   Tensor(const std::shared_ptr<DataType>& type, const std::shared_ptr<Buffer>& data,
          const std::vector<int64_t>& shape, const std::vector<int64_t>& strides,
          const std::vector<std::string>& dim_names);
@@ -114,6 +114,34 @@ class ARROW_EXPORT Tensor {
   ARROW_DISALLOW_COPY_AND_ASSIGN(Tensor);
 };
 
+template <typename TYPE>
+class ARROW_EXPORT NumericTensor : public Tensor {
+ public:
+  using TypeClass = TYPE;
+  using value_type = typename TypeClass::c_type;
+
+  /// Constructor with no dimension names or strides, data assumed to be row-major
+  NumericTensor(const std::shared_ptr<Buffer>& data, const std::vector<int64_t>& shape);
+
+  /// Constructor with non-negative strides
+  NumericTensor(const std::shared_ptr<Buffer>& data, const std::vector<int64_t>& shape,
+                const std::vector<int64_t>& strides);
+
+  /// Constructor with non-negative strides and dimension names
+  NumericTensor(const std::shared_ptr<Buffer>& data, const std::vector<int64_t>& shape,
+                const std::vector<int64_t>& strides,
+                const std::vector<std::string>& dim_names);
+
+  const value_type& Value(const std::vector<int64_t>& index) const {
+    int64_t offset = CalculateValueOffset(index);
+    const value_type* ptr = reinterpret_cast<const value_type*>(raw_data() + offset);
+    return *ptr;
+  }
+
+ protected:
+  int64_t CalculateValueOffset(const std::vector<int64_t>& index) const;
+};
+
 }  // namespace arrow
 
 #endif  // ARROW_TENSOR_H