fix datatype

InfiniTensor · Apr 1, 2024 · 2b698b3 · 2b698b3
1 parent 5b33afd
commit 2b698b3
Show file tree

Hide file tree

Showing 19 changed files with 56 additions and 49 deletions.
diff --git a/src/04kernel/include/kernel/attributes/moe_info.h b/src/04kernel/include/kernel/attributes/moe_info.h
@@ -6,17 +6,17 @@
 namespace refactor::kernel {
 
     struct AssignPosInfo {
-        uint32_t top, expert_num;
-        uint32_t elementSize;
+        int64_t top, expert_num;
+        int64_t elementSize;
 
-        AssignPosInfo(uint32_t top, uint32_t expert_num, Tensor const &gate);        
+        AssignPosInfo(int64_t top, int64_t expert_num, Tensor const &gate);        
     };
 
     struct ReorderInfo{
         bool scatter;  
-        uint32_t top;
-        uint32_t blockNum, blockSize;
-        ReorderInfo(bool scatter, uint32_t top, TensorRefs inputs);
+        int64_t top;
+        int64_t blockNum, blockSize;
+        ReorderInfo(bool scatter, int64_t top, TensorRefs inputs);
     };
 
 }// namespace refactor::kernel

diff --git a/src/04kernel/include/kernel/attributes/topk_info.h b/src/04kernel/include/kernel/attributes/topk_info.h
@@ -7,12 +7,12 @@ namespace refactor::kernel {
 
     struct TopKInfo {
 
-        uint8_t topk;
-        uint8_t axis;
+        int64_t topk;
+        int64_t axis;
         size_t in_stride, in_stride_pre_axis, out_stride_pre_axis;
         size_t elem_size, axis_elem_size;
 
-        TopKInfo(uint8_t topk, uint8_t axis, Tensor const &input);
+        TopKInfo(int64_t topk, int64_t axis, Tensor const &input);
         size_t getElementSize() const {return  elem_size;}
         size_t getAxisElementSize()const { return axis_elem_size;}
         size_t getInStride()const{return in_stride;}

diff --git a/src/04kernel/include/kernel/collectors/moe.h b/src/04kernel/include/kernel/collectors/moe.h
@@ -16,8 +16,8 @@ namespace refactor::kernel {
 
     struct ReorderCollector final : public InfoCollector {
         bool scatter;
-        uint32_t topk;
-        constexpr ReorderCollector(decltype(_target) target, bool scatter, uint32_t topk) noexcept
+        int64_t topk;
+        constexpr ReorderCollector(decltype(_target) target, bool scatter, int64_t topk) noexcept
             : InfoCollector(target) ,scatter(scatter), topk(topk){}
 
         std::vector<KernelBox>

diff --git a/src/04kernel/include/kernel/collectors/topk.h b/src/04kernel/include/kernel/collectors/topk.h
@@ -6,9 +6,9 @@
 namespace refactor::kernel {
 
     struct TopKCollector final : public InfoCollector {
-        uint32_t topk, axis;
+        int64_t topk, axis;
 
-        constexpr TopKCollector(decltype(_target) target, uint32_t topk, uint32_t axis_) noexcept
+        constexpr TopKCollector(decltype(_target) target, int64_t topk, int64_t axis_) noexcept
             : InfoCollector(target), topk(topk), axis(axis_) {}
 
         std::vector<KernelBox>

diff --git a/src/04kernel/src/attributes/moe_info.cc b/src/04kernel/src/attributes/moe_info.cc
@@ -3,10 +3,10 @@
 
 namespace refactor::kernel {
 
-AssignPosInfo::AssignPosInfo(uint32_t top, uint32_t expert_num, Tensor const &gate):\
+AssignPosInfo::AssignPosInfo(int64_t top, int64_t expert_num, Tensor const &gate):\
     top(top), expert_num(expert_num),elementSize(gate.elementsSize()){}      
 
-ReorderInfo::ReorderInfo(bool scatter, uint32_t top, TensorRefs inputs):\
+ReorderInfo::ReorderInfo(bool scatter, int64_t top, TensorRefs inputs):\
     scatter(scatter), top(top),blockNum(inputs[1].get().elementsSize()), blockSize(inputs[0].get().strides()[0]){}  
 
 

diff --git a/src/04kernel/src/attributes/topk_info.cc b/src/04kernel/src/attributes/topk_info.cc
@@ -3,7 +3,7 @@
 
 namespace refactor::kernel {
 
-TopKInfo::TopKInfo(uint8_t topk, uint8_t axis, Tensor const &input):topk(topk), 
+TopKInfo::TopKInfo(int64_t topk, int64_t axis, Tensor const &input):topk(topk), 
             axis(axis),
             in_stride(input.strides()[axis]),
             in_stride_pre_axis(axis == 0 ? 0 : input.strides()[axis - 1]),

diff --git a/src/04kernel/src/kernels/moe/cpu_kernel.cc b/src/04kernel/src/kernels/moe/cpu_kernel.cc
@@ -25,16 +25,16 @@ namespace refactor::kernel {
     auto AssignPosCpu::lower(Resources &) const noexcept -> RoutineWorkspace {
         using namespace runtime;
         return [info = this->info](Resources &, void *workspace, void const *const *inputs, void *const *outputs) {
-            auto gate = reinterpret_cast<uint8_t const *>(inputs[0]);
+            auto gate = reinterpret_cast<int64_t const *>(inputs[0]);
 
-            auto expert_cnt = reinterpret_cast<uint8_t*>(outputs[0]);//T
-            auto pos = reinterpret_cast<uint8_t*>(outputs[1]);
+            auto expert_cnt = reinterpret_cast<int64_t*>(outputs[0]);//T
+            auto pos = reinterpret_cast<int64_t*>(outputs[1]);
             std::memset(expert_cnt, 0, info.expert_num);
             for (size_t i = 0; i < info.elementSize; i ++){
                 ASSERT (gate[i] >= 0 && gate[i] < info.expert_num, "gate exceeds expert idx scope!");
                 expert_cnt[gate[i]] ++;
             }
-            std::vector<uint8_t> expert_accumlate;
+            std::vector<int64_t> expert_accumlate;
             expert_accumlate.assign(info.expert_num, 0);
             for (size_t i=0; i<expert_accumlate.size(); ++i){
                 expert_accumlate[i] = (i==0) ? expert_cnt[i] : (expert_accumlate[i-1] + expert_cnt[i]);
@@ -69,7 +69,7 @@ namespace refactor::kernel {
         using namespace runtime;
         return [info = this->info](Resources &, void *workspace, void const *const *inputs, void *const *outputs) {
             auto input = reinterpret_cast<float const *>(inputs[0]);
-            auto pos = reinterpret_cast<uint32_t const *>(inputs[1]);
+            auto pos = reinterpret_cast<int64_t const *>(inputs[1]);
             auto dstVal = reinterpret_cast<float*>(outputs[0]);//T
 
             for(size_t i = 0; i<info.blockNum; i++){

diff --git a/src/04kernel/src/kernels/topk/cpu_kernel.cc b/src/04kernel/src/kernels/topk/cpu_kernel.cc
@@ -29,7 +29,7 @@ namespace refactor::kernel {
             auto src = reinterpret_cast<float const *>(inputs[0]);
 
             auto dstVal = reinterpret_cast<float*>(outputs[0]);//T
-            auto dstIndex = reinterpret_cast<uint32_t*>(outputs[1]);
+            auto dstIndex = reinterpret_cast<int64_t*>(outputs[1]);
 
 
             size_t M = info.getElementSize() / info.getAxisElementSize();
@@ -40,7 +40,7 @@ namespace refactor::kernel {
             auto outStride2 = inStride2;
 
             for(size_t m = 0; m < M; m ++){
-                using PairType = std::pair<float, uint8_t>;
+                using PairType = std::pair<float, int64_t>;
                 std::list<PairType> list;
                 for(size_t n = 0; n < N; n++){                    
                     auto srcIdx = m /inStride2 * inStride1 + m % inStride2 + n * inStride2;                    
@@ -49,7 +49,7 @@ namespace refactor::kernel {
                 list.sort([](const PairType &a, const PairType &b)->bool{return a.first > b.first;});
 
                 size_t offset = m /inStride2 * outStride1 + m % inStride2;
-                std::for_each_n(list.begin(), (uint32_t)info.topk,
+                std::for_each_n(list.begin(), (int64_t)info.topk,
                             [&](auto &elem) {                                
                                 dstVal[offset] = elem.first;
                                 dstIndex[offset] = elem.second;

diff --git a/src/04kernel/test/kernels/topk/test_cpu.cpp b/src/04kernel/test/kernels/topk/test_cpu.cpp
@@ -9,7 +9,7 @@ TEST(kernel, TopKCpu) {
     // build routine    
     auto inputTensor = Tensor::share(DataType::F32, Shape{3, 4});
     auto outputTensor0 = Tensor::share(DataType::F32, Shape{3, 3});
-    auto outputTensor1 = Tensor::share(DataType::U32, Shape{3, 3});
+    auto outputTensor1 = Tensor::share(DataType::I64, Shape{3, 3});
 
     auto kernel = TopKCpu::build(TopKInfo(3,1, *inputTensor));
     ASSERT_TRUE(kernel);
@@ -28,7 +28,7 @@ TEST(kernel, TopKCpu) {
 
     // check
     std::vector<float> expectVal = {3,2,1,7,6,5,11,10,9};
-    std::vector<uint32_t> expectIdx = {3,2,1,3,2,1,3,2,1};
+    std::vector<int64_t> expectIdx = {3,2,1,3,2,1,3,2,1};
     std::for_each(out0.begin(), out0.end(),[](const float &val){std::cout<<val<<" ";});
 
     for(size_t i=0;i< expectVal.size(); ++i){

diff --git a/src/05computation/include/computation/operators/moe.h b/src/05computation/include/computation/operators/moe.h
@@ -6,9 +6,9 @@
 namespace refactor::computation {
 
     struct AssignPos final : public Operator {
-        uint32_t topk,numExperts;
+        int64_t topk,numExperts;
 
-        constexpr explicit AssignPos(uint32_t topk, uint32_t numExperts) noexcept : Operator(), 
+        constexpr explicit AssignPos(int64_t topk, int64_t numExperts) noexcept : Operator(), 
             topk(topk), numExperts(numExperts){}
 
         static size_t typeId() noexcept;
@@ -20,9 +20,9 @@ namespace refactor::computation {
 
      struct Reorder final : public Operator {
         bool scatter;
-        uint32_t topk;
+        int64_t topk;
 
-        constexpr explicit Reorder(bool scatter, uint32_t topk) noexcept : Operator(), 
+        constexpr explicit Reorder(bool scatter, int64_t topk) noexcept : Operator(), 
             scatter(scatter), topk(topk){}
 
         static size_t typeId() noexcept;

diff --git a/src/05computation/include/computation/operators/topk.h b/src/05computation/include/computation/operators/topk.h
@@ -6,8 +6,8 @@
 namespace refactor::computation {
 
     struct TopK final : public Operator {
-        uint32_t topk,axis;
-        constexpr TopK(uint32_t topk, uint32_t axis) noexcept : topk(topk), axis(axis){}
+        int64_t topk, axis;
+        constexpr TopK(int64_t topk, int64_t axis) noexcept : topk(topk), axis(axis){}
 
         static size_t typeId() noexcept;
         size_t opTypeId() const noexcept final;

diff --git a/src/07onnx/src/operators/topk.cc b/src/07onnx/src/operators/topk.cc
@@ -40,7 +40,7 @@ namespace refactor::onnx {
         auto dependencies = extractDependency(inputs);
         ans[0] = Tensor::share(input.dataType, input.shape, dependencies);
         ans[0]->shape[axis_] = DimExpr(topk);
-        ans[1] = Tensor::share(input.dataType, input.shape, dependencies);            
+        ans[1] = Tensor::share(DataType::I64, input.shape, dependencies);            
         ans[1]->shape[axis_] = DimExpr(topk);  
         return Ok(Tensors{std::move(ans)});
     }

diff --git a/src/08-02moe/include/operators.h → src/08-02moe/include/moe/operators.h b/src/08-02moe/include/operators.h → src/08-02moe/include/moe/operators.h
diff --git a/src/08-02moe/src/operators.cpp b/src/08-02moe/src/operators.cpp
@@ -1,4 +1,4 @@
-#include "operators.h"
+#include "moe/operators.h"
 #include "operators/moe.hh"
 
 namespace refactor::moe {

diff --git a/src/08-02moe/src/operators/moe.cc b/src/08-02moe/src/operators/moe.cc
@@ -4,7 +4,7 @@
 
 namespace refactor::moe {
 
-    AssignPos::AssignPos(uint32_t topk, uint32_t numExperts) : Operator() ,topk(topk), numExperts(numExperts){}
+    AssignPos::AssignPos(Int topk, Int numExperts) : Operator() ,topk(topk), numExperts(numExperts){}
 
     auto AssignPos::build(ModelContext const &, std::string_view, Attributes attributes) -> OpBox { 
         auto topk = attributes["topk"].int_();
@@ -23,8 +23,10 @@ namespace refactor::moe {
         EXPECT_SIZE(1)
 
         auto const &gate = inputs[0];
-
-        if (gate.dataType != DataType::I16) {
+        if(topk < 0 || numExperts < 0 || topk > numExperts){
+            return Err(InferError(ERROR_MSG("topk or numExperts is error")));
+        }
+        if (gate.dataType != DataType::I64) {
             return Err(InferError(ERROR_MSG("Input data type not support")));
         }
 
@@ -37,7 +39,7 @@ namespace refactor::moe {
         return std::make_unique<Op_>(topk, numExperts);
     }
 
-    Reorder::Reorder(bool scatter, uint32_t topk, uint32_t dim) : Operator() ,scatter(scatter), top(topk), dim(dim){}
+    Reorder::Reorder(bool scatter, Int topk, Int dim) : Operator() ,scatter(scatter), top(topk), dim(dim){}
 
     auto Reorder::build(ModelContext const &, std::string_view, Attributes attributes) -> OpBox { 
         auto topk = attributes["topk"].int_();
@@ -59,12 +61,15 @@ namespace refactor::moe {
         auto const &pos = inputs[1];
         if (dim != 0)
             return Err(InferError(ERROR_MSG("dim is not right!")));
-        if(scatter && input.elementsSize() * top != pos.elementsSize())
+        if(top < 0 ){
+            return Err(InferError(ERROR_MSG("topkis error")));
+        }
+        if(scatter && input.elementsSize()/input.shape[input.shape.size()-1].value() * top != pos.elementsSize())
             return Err(InferError(ERROR_MSG("Inputs data size are not right!")));
-        else if(!scatter && input.elementsSize()  != pos.elementsSize())
+        else if(!scatter &&  input.elementsSize()/input.shape[input.shape.size()-1].value()   != pos.elementsSize())
             return Err(InferError(ERROR_MSG("Inputs data size are not right!")));
 
-        if (pos.dataType != DataType::I16) {
+        if (pos.dataType != DataType::I64) {
             return Err(InferError(ERROR_MSG("Input data type not support")));
         }
 

diff --git a/src/08-02moe/src/operators/moe.hh b/src/08-02moe/src/operators/moe.hh
@@ -7,8 +7,8 @@ namespace refactor::moe {
     using namespace frontend;
 
     struct AssignPos final : public Operator {
-        uint32_t topk, numExperts;
-        explicit AssignPos(uint32_t topk, uint32_t numExperts);
+        Int topk, numExperts;
+        explicit AssignPos(Int topk, Int numExperts);
 
         static OpBox build(ModelContext const &, std::string_view, Attributes);
         static size_t typeId();
@@ -21,8 +21,8 @@ namespace refactor::moe {
 
     struct Reorder final : public Operator {
         bool scatter;  
-        uint32_t top, dim;
-        explicit Reorder(bool scatter, uint32_t topk, uint32_t dim);
+        Int top, dim;
+        explicit Reorder(bool scatter, Int topk, Int dim);
 
         static OpBox build(ModelContext const &, std::string_view, Attributes);
         static size_t typeId();

diff --git a/src/08-02moe/test/test_moe.cpp b/src/08-02moe/test/test_moe.cpp
@@ -1,5 +1,5 @@
 #include "../src/operators/moe.hh"
-#include "operators.h"
+#include "moe/operators.h"
 #include <gtest/gtest.h>
 
 using namespace refactor;
@@ -9,7 +9,7 @@ TEST(infer, AssignPos) {
     moe::register_();
     auto edges = Edges{
 
-        {Tensor::share(DataType::I16, Shape{DimExpr(8), DimExpr(2)}, {}), ""},//gate 8*2
+        {Tensor::share(DataType::I64, Shape{DimExpr(8), DimExpr(2)}, {}), ""},//gate 8*2
     };
     count_t inputs[]{0};
     auto infered = AssignPos(2,4).infer(TensorRefs(edges, inputs), {true});
@@ -20,6 +20,6 @@ TEST(infer, AssignPos) {
     ASSERT_EQ(expert_cnt->dataType, DataType::F32);
     ASSERT_EQ(expert_cnt->shape, (Shape{DimExpr(4)}));
     auto pos = std::move(outputs[1]);
-    ASSERT_EQ(pos->dataType, DataType::I16);
+    ASSERT_EQ(pos->dataType, DataType::I64);
     ASSERT_EQ(pos->shape, (Shape{DimExpr(16)}));
 }
diff --git a/src/09python_ffi/CMakeLists.txt b/src/09python_ffi/CMakeLists.txt
@@ -7,7 +7,7 @@ add_subdirectory(pybind11)
 
 file(GLOB_RECURSE PYFFI_SRC src/*.cc src/*.cpp)
 pybind11_add_module(python_ffi SHARED ${PYFFI_SRC})
-target_link_libraries(python_ffi PRIVATE onnx llm communication)
+target_link_libraries(python_ffi PRIVATE onnx llm communication moe)
 target_include_directories(python_ffi PRIVATE include)
 
 # EXAMPLE_VERSION_INFO is defined by setup.py and passed into the C++ code as a

diff --git a/src/09python_ffi/src/main.cpp b/src/09python_ffi/src/main.cpp
@@ -3,6 +3,7 @@
 #include "import.h"
 #include "llm/operators.h"
 #include "onnx/operators.h"
+#include "moe/operators.h"
 #include <pybind11/stl.h>// keep this line to convert stl types
 
 namespace py = pybind11;
@@ -17,6 +18,7 @@ namespace refactor::python_ffi {
         onnx::register_();
         llm::register_();
         communication::register_();
+        moe::register_();
 
         // clang-format off