fbshipit-source-id: ba600fcd2b5cefc7621357bdeb05e24cea02e5af

gkmhub · Jun 27, 2018 · 9ec0a2a · 9ec0a2a
1 parent 290d20b
commit 9ec0a2a
Show file tree

Hide file tree

Showing 69 changed files with 989 additions and 300 deletions.
diff --git a/.gitattributes b/.gitattributes
diff --git a/aten/src/ATen/native/Pooling.cpp b/aten/src/ATen/native/Pooling.cpp
@@ -41,13 +41,10 @@ std::tuple<Tensor,Tensor> adaptive_max_pool1d(const Tensor & self, IntList outpu
   return std::make_tuple(output.squeeze(2), indices.squeeze(2));
 }
 
-std::tuple<Tensor, Tensor> max_pool1d_with_indices(
-    const Tensor& self,
-    IntList kernel_size,
-    IntList stride,
-    IntList padding,
-    IntList dilation,
-    bool ceil_mode) {
+std::tuple<Tensor,Tensor> max_pool1d(
+    const Tensor & self, IntList kernel_size, IntList stride, IntList padding,
+    IntList dilation, bool ceil_mode) {
+
   if (stride.empty()) {
     stride = kernel_size;
   }
@@ -58,7 +55,7 @@ std::tuple<Tensor, Tensor> max_pool1d_with_indices(
   check1d("max_pool1d", "dilation", dilation);
 
   Tensor output, indices;
-  std::tie(output, indices) = at::max_pool2d_with_indices(
+  std::tie(output, indices) = at::max_pool2d(
       self.unsqueeze(2),
       {1, kernel_size[0]},
       {1, stride[0]},
@@ -94,41 +91,5 @@ Tensor avg_pool1d(
 
   return output.squeeze(2);
 }
-
-Tensor max_pool1d(
-    const Tensor& self,
-    IntList kernel_size,
-    IntList stride,
-    IntList padding,
-    IntList dilation,
-    bool ceil_mode) {
-  auto output_and_indices = at::max_pool1d_with_indices(
-      self, kernel_size, stride, padding, dilation, ceil_mode);
-  return std::get<0>(output_and_indices);
-}
-
-Tensor max_pool2d(
-    const Tensor& self,
-    IntList kernel_size,
-    IntList stride,
-    IntList padding,
-    IntList dilation,
-    bool ceil_mode) {
-  auto output_and_indices = at::max_pool2d_with_indices(
-      self, kernel_size, stride, padding, dilation, ceil_mode);
-  return std::get<0>(output_and_indices);
-}
-
-Tensor max_pool3d(
-    const Tensor& self,
-    IntList kernel_size,
-    IntList stride,
-    IntList padding,
-    IntList dilation,
-    bool ceil_mode) {
-  auto output_and_indices = at::max_pool3d_with_indices(
-      self, kernel_size, stride, padding, dilation, ceil_mode);
-  return std::get<0>(output_and_indices);
-}
 } // namespace native
 } // namespace at
diff --git a/aten/src/ATen/native/native_functions.yaml b/aten/src/ATen/native/native_functions.yaml
@@ -817,16 +817,7 @@
 
 - func: max_values(Tensor self, int64_t dim, bool keepdim=false) -> Tensor
 
-- func: max_pool1d_with_indices(Tensor self, IntList[1] kernel_size, IntList[1] stride={}, IntList[1] padding=0, IntList[1] dilation=1, bool ceil_mode=false) -> (Tensor, Tensor)
-  variants: function
-
-- func: max_pool1d(Tensor self, IntList[1] kernel_size, IntList[1] stride={}, IntList[1] padding=0, IntList[1] dilation=1, bool ceil_mode=false) -> Tensor
-  variants: function
-
-- func: max_pool2d(Tensor self, IntList[1] kernel_size, IntList[1] stride={}, IntList[1] padding=0, IntList[1] dilation=1, bool ceil_mode=false) -> Tensor
-  variants: function
-
-- func: max_pool3d(Tensor self, IntList[1] kernel_size, IntList[1] stride={}, IntList[1] padding=0, IntList[1] dilation=1, bool ceil_mode=false) -> Tensor
+- func: max_pool1d(Tensor self, IntList[1] kernel_size, IntList[1] stride={}, IntList[1] padding=0, IntList[1] dilation=1, bool ceil_mode=false) -> (Tensor, Tensor)
   variants: function
 
 # FIXME: These could be combined as optional<ScalarType> but for https://github.com/pytorch/pytorch/issues/6593.

diff --git a/aten/src/ATen/nn.yaml b/aten/src/ATen/nn.yaml
@@ -149,12 +149,12 @@
   scalar_check:
     output: 'false'
 
-- name: max_pool2d_with_indices(Tensor self, IntList[2] kernel_size, IntList[2] stride={}, IntList[2] padding=0, IntList[2] dilation=1, bool ceil_mode=false)
+- name: max_pool2d(Tensor self, IntList[2] kernel_size, IntList[2] stride={}, IntList[2] padding=0, IntList[2] dilation=1, bool ceil_mode=false)
   cname: SpatialDilatedMaxPooling
   default_init:
     stride: kernel_size
 
-- name: max_pool3d_with_indices(Tensor self, IntList[3] kernel_size, IntList[3] stride={}, IntList[3] padding=0, IntList[3] dilation=1, bool ceil_mode=false)
+- name: max_pool3d(Tensor self, IntList[3] kernel_size, IntList[3] stride={}, IntList[3] padding=0, IntList[3] dilation=1, bool ceil_mode=false)
   cname: VolumetricDilatedMaxPooling
   default_init:
     stride: kernel_size

diff --git a/caffe2/core/blob_test.cc b/caffe2/core/blob_test.cc
@@ -521,19 +521,28 @@ TEST(TensorTest, TensorNonFundamentalType) {
   }
 }
 
-TEST(TensorTest, TensorNonFundamentalTypeCopy) {
+TEST(TensorTest, TensorNonFundamentalTypeClone) {
   TensorCPU tensor(vector<int>{2, 3, 4});
   std::string* ptr = tensor.mutable_data<std::string>();
   EXPECT_TRUE(ptr != nullptr);
   for (int i = 0; i < tensor.size(); ++i) {
     EXPECT_TRUE(ptr[i] == "");
     ptr[i] = "filled";
   }
-  TensorCPU dst_tensor(tensor);
+  TensorCPU dst_tensor = tensor.Clone();
   const std::string* dst_ptr = dst_tensor.data<std::string>();
   for (int i = 0; i < dst_tensor.size(); ++i) {
     EXPECT_TRUE(dst_ptr[i] == "filled");
   }
+  // Change the original tensor
+  for (int i = 0; i < tensor.size(); ++i) {
+    EXPECT_TRUE(ptr[i] == "filled");
+    ptr[i] = "changed";
+  }
+  // Confirm that the cloned tensor is not affect
+  for (int i = 0; i < dst_tensor.size(); ++i) {
+    EXPECT_TRUE(dst_ptr[i] == "filled");
+  }
 }
 
 TEST(TensorTest, Tensor64BitDimension) {
@@ -1060,5 +1069,47 @@ TEST(BlobTest, CastingMessage) {
   }
 }
 
+TEST(TensorConstruction, UnitializedCopyTest) {
+  CPUContext context;
+  TensorCPU x;
+  TensorCPU y(x, &context);
+  TensorCPU z = x.Clone();
+  // should be uninitialized
+  EXPECT_EQ(x.size(), -1);
+  EXPECT_EQ(y.size(), -1);
+  LOG(INFO) << "z.size()" << z.size();
+  EXPECT_EQ(z.size(), -1);
+}
+
+TEST(TensorConstruction, CopyConstructorTest) {
+  CPUContext context;
+
+  TensorCPU x;
+  x.Resize(5);
+  x.mutable_data<float>()[0] = 1;
+  TensorCPU y = x.Clone();
+  TensorCPU z(x, &context);
+  TensorCPU w;
+
+  EXPECT_EQ(*x.data<float>(), 1);
+  EXPECT_EQ(*y.data<float>(), 1);
+  EXPECT_EQ(*z.data<float>(), 1);
+  x.mutable_data<float>()[0] = 5;
+  EXPECT_EQ(*x.data<float>(), 5);
+  EXPECT_EQ(*y.data<float>(), 1);
+  EXPECT_EQ(*z.data<float>(), 1);
+}
+
+TEST(TensorConstruction, MoveConstructorTest) {
+  CPUContext context;
+
+  TensorCPU x;
+  x.Resize(5);
+  x.mutable_data<float>()[0] = 1;
+  TensorCPU y = std::move(x);
+
+  EXPECT_EQ(*y.data<float>(), 1);
+}
+
 } // namespace
 } // namespace caffe2
diff --git a/caffe2/core/logging_is_google_glog.h b/caffe2/core/logging_is_google_glog.h
@@ -12,9 +12,9 @@
 #include <cuda.h>
 #endif
 
-#if (!defined(__CUDACC__) || CUDA_VERSION > 9000 ) && !defined(CAFFE2_USE_MINIMAL_GOOGLE_GLOG)
+#if !defined(__CUDACC__) && !defined(CAFFE2_USE_MINIMAL_GOOGLE_GLOG)
 #include <glog/stl_logging.h>
-#else // (!defined(__CUDACC__) || CUDA_VERSION > 9000 ) && !defined(CAFFE2_USE_MINIMAL_GOOGLE_GLOG)
+#else // !defined(__CUDACC__) && !defined(CAFFE2_USE_MINIMAL_GOOGLE_GLOG)
 
 // here, we need to register a fake overload for vector/string - here,
 // we just ignore the entries in the logs.

diff --git a/caffe2/core/nomnigraph/include/nomnigraph/Generated/OpClasses.h b/caffe2/core/nomnigraph/include/nomnigraph/Generated/OpClasses.h
@@ -555,13 +555,32 @@ class GivenTensorFill : public NeuralNetOperator {
 
 class Concat : public NeuralNetOperator {
  public:
-  Concat() : NeuralNetOperator(NNKind::Concat) {}
+  Concat(int axis = -1, bool addAxis = false)
+      : NeuralNetOperator(NNKind::Concat), Axis(axis), AddAxis(addAxis) {}
 
   ~Concat() {}
 
   NOMNIGRAPH_DEFINE_NN_RTTI(Concat);
 
+  int getAxis() const {
+    return Axis;
+  }
+
+  bool getAddAxis() const {
+    return AddAxis;
+  }
+
+  void setAxis(int axis) {
+    Axis = axis;
+  }
+
+  void setAddAxis(bool addAxis) {
+    AddAxis = addAxis;
+  }
+
  private:
+  int Axis;
+  bool AddAxis;
 };
 
 class Softmax : public NeuralNetOperator {
@@ -908,3 +927,68 @@ class Int8MaxPoolRelu : public NeuralNetOperator {
 
  private:
 };
+
+class BatchMatMul : public NeuralNetOperator {
+ public:
+  BatchMatMul(bool transA = false, bool transB = true, bool broadcast = false)
+      : NeuralNetOperator(NNKind::BatchMatMul),
+        TransA(transA),
+        TransB(transB),
+        Broadcast(broadcast) {}
+
+  ~BatchMatMul() {}
+
+  NOMNIGRAPH_DEFINE_NN_RTTI(BatchMatMul);
+
+  bool getTransA() const {
+    return TransA;
+  }
+
+  bool getTransB() const {
+    return TransB;
+  }
+
+  bool getBroadcast() const {
+    return Broadcast;
+  }
+
+  void setTransA(bool transA) {
+    TransA = transA;
+  }
+
+  void setTransB(bool transB) {
+    TransB = transB;
+  }
+
+  void setBroadcast(bool broadcast) {
+    Broadcast = broadcast;
+  }
+
+ private:
+  bool TransA;
+  bool TransB;
+  bool Broadcast;
+};
+
+class BatchGather : public NeuralNetOperator {
+ public:
+  BatchGather() : NeuralNetOperator(NNKind::BatchGather) {}
+
+  ~BatchGather() {}
+
+  NOMNIGRAPH_DEFINE_NN_RTTI(BatchGather);
+
+ private:
+};
+
+class ConcatBatchMatMulBatchGatherOp : public NeuralNetOperator {
+ public:
+  ConcatBatchMatMulBatchGatherOp()
+      : NeuralNetOperator(NNKind::ConcatBatchMatMulBatchGatherOp) {}
+
+  ~ConcatBatchMatMulBatchGatherOp() {}
+
+  NOMNIGRAPH_DEFINE_NN_RTTI(ConcatBatchMatMulBatchGatherOp);
+
+ private:
+};
diff --git a/caffe2/core/nomnigraph/include/nomnigraph/Generated/OpEnum.h b/caffe2/core/nomnigraph/include/nomnigraph/Generated/OpEnum.h
@@ -5,4 +5,5 @@ Relu, Conv, ConvRelu, ConvTranspose, AveragePool, AveragePoolRelu, MaxPool,
     Int8Conv, Int8ConvTranspose, Int8FC, Int8MaxPool, Int8Relu,
     Int8GivenTensorFill, Int8Concat, Int8Softmax, Int8ChannelShuffle, Int8Sum,
     Int8Add, Int8Reshape, Int8Flatten, Int8ConvRelu, Int8SumRelu,
-    Int8AveragePoolRelu, Int8MaxPoolRelu
+    Int8AveragePoolRelu, Int8MaxPoolRelu, BatchMatMul, BatchGather,
+    ConcatBatchMatMulBatchGatherOp
diff --git a/caffe2/core/nomnigraph/include/nomnigraph/Generated/OpNames.h b/caffe2/core/nomnigraph/include/nomnigraph/Generated/OpNames.h
@@ -84,3 +84,9 @@ case NNKind::Int8AveragePoolRelu:
   return "Int8AveragePoolRelu";
 case NNKind::Int8MaxPoolRelu:
   return "Int8MaxPoolRelu";
+case NNKind::BatchMatMul:
+  return "BatchMatMul";
+case NNKind::BatchGather:
+  return "BatchGather";
+case NNKind::ConcatBatchMatMulBatchGatherOp:
+  return "ConcatBatchMatMulBatchGatherOp";
diff --git a/caffe2/core/nomnigraph/include/nomnigraph/Support/Common.h b/caffe2/core/nomnigraph/include/nomnigraph/Support/Common.h
@@ -14,6 +14,24 @@
 #include <functional>
 #include <list>
 
+// These #defines are useful when writing passes as the collapse
+//
+// if (!cond) {
+//   continue; // or break; or return;
+// }
+//
+// into a single line without negation
+
+#define NOM_REQUIRE_OR_(_cond, _expr) \
+  if (!(_cond)) {                     \
+    _expr;                            \
+  }
+
+#define NOM_REQUIRE_OR_CONT(_cond) NOM_REQUIRE_OR_(_cond, continue)
+#define NOM_REQUIRE_OR_BREAK(_cond) NOM_REQUIRE_OR_(_cond, break)
+#define NOM_REQUIRE_OR_RET_NULL(_cond) NOM_REQUIRE_OR_(_cond, return nullptr)
+#define NOM_REQUIRE_OR_RET(_cond) NOM_REQUIRE_OR_(_cond, return )
+
 // Implements accessors for a generic type T. If the type is not
 // specified (i.e., void template type) then the partial specification
 // gives an empty type.

diff --git a/caffe2/core/nomnigraph/ops.def b/caffe2/core/nomnigraph/ops.def
@@ -55,6 +55,8 @@ BatchNormalization
 FC
 GivenTensorFill
 Concat
+- Axis : int : -1
+- AddAxis : bool : false
 Softmax
 ChannelShuffle
 Add
@@ -84,3 +86,10 @@ Int8ConvRelu : ConvRelu
 Int8SumRelu : SumRelu
 Int8AveragePoolRelu : AveragePoolRelu
 Int8MaxPoolRelu : MaxPoolRelu
+
+BatchMatMul
+- TransA : bool : false
+- TransB : bool : true
+- Broadcast: bool : false
+BatchGather
+ConcatBatchMatMulBatchGatherOp
diff --git a/caffe2/core/plan_executor.cc b/caffe2/core/plan_executor.cc
@@ -124,7 +124,7 @@ struct WorkspaceIdInjector {
   void InjectWorkspaceId(Workspace* workspace) {
     if (workspace->HasBlob(NODE_ID)) {
       Blob* node_id_blob = workspace->GetBlob(NODE_ID);
-      TensorCPU node_id_tensor = node_id_blob->template Get<TensorCPU>();
+      const TensorCPU& node_id_tensor = node_id_blob->template Get<TensorCPU>();
       int node_id = node_id_tensor.template data<int32_t>()[0];
       CAFFE_ENFORCE(
           seq_ < (1 << 16),