Merge pull request #3029 from alibaba/feature/support_qwen2vl

[Llm:Feature] Support Qwen2-VL export and inference.
alibaba · Sep 12, 2024 · ddd9a61 · ddd9a61
2 parents f0e516a + 9471df1
commit ddd9a61
Show file tree

Hide file tree

Showing 8 changed files with 9,137 additions and 142 deletions.
diff --git a/source/core/TensorUtils.hpp b/source/core/TensorUtils.hpp
@@ -13,7 +13,7 @@
 #include "Backend.hpp"
 #include "AutoStorage.h"
 #include "Tensor_generated.h"
-#define MNN_MAX_TENSOR_DIM 8
+#define MNN_MAX_TENSOR_DIM 9
 
 #ifdef CONSTANT
 #undef CONSTANT
@@ -187,7 +187,7 @@ class MNN_PUBLIC TensorUtils {
     static bool isTileRegion(const Tensor::InsideDescribe::Region& region);
     static bool isDepthToSpaceRegions(const Tensor* output);
     static bool reshapeSlice(Tensor::InsideDescribe::Region& slice, int outside, int inside, int axis);
-    
+
     class FuseRegionStatus;
     class MNN_PUBLIC FuseWrap {
     public:
@@ -201,10 +201,10 @@ class MNN_PUBLIC TensorUtils {
     static void adjustTensorForCompability(Tensor* t);
     static Tensor::DimensionType getDimType(const Tensor* t);
     static std::vector<float> getQuantInfo(const Tensor* t);
-    
+
     static size_t getRawSize(const Tensor* t);
     static void setRasterInputs(Command* cmd);
-    
+
     static bool refTensorContent(Tensor* dst, const Tensor* src);
 
     static int getTensorChannelPack(const Tensor* tensor);

diff --git a/transformers/llm/engine/CMakeLists.txt b/transformers/llm/engine/CMakeLists.txt
@@ -1,3 +1,11 @@
+option(LLM_SUPPORT_VISION "Llm model support vision input." OFF)
+
+if (LLM_SUPPORT_VISION)
+    add_definitions(-DLLM_SUPPORT_VISION)
+    list(APPEND MNN_DEPS MNNOpenCV)
+    include_directories(${CMAKE_SOURCE_DIR}/tools/cv/include/)
+endif()
+
 # include dir
 include_directories(${CMAKE_CURRENT_LIST_DIR}/include/)
 

diff --git a/transformers/llm/engine/include/llm/llm.hpp b/transformers/llm/engine/include/llm/llm.hpp
@@ -105,7 +105,6 @@ class MNN_PUBLIC Llm {
     virtual MNN::Express::VARP gen_attention_mask(int seq_len);
     virtual MNN::Express::VARP gen_position_ids(int seq_len);
     bool mTracing = false;
-
 };
 
 // Embedding start