Skip to content

Commit

Permalink
Merge pull request #2676 from wangzhaode/featue/sync_2.8.0
Browse files Browse the repository at this point in the history
[MNN:Sync] Sync Internal 2.8.0
  • Loading branch information
wangzhaode authored Dec 4, 2023
2 parents 8d5d8b8 + 387775b commit 085e9d3
Show file tree
Hide file tree
Showing 347 changed files with 34,680 additions and 12,043 deletions.
9 changes: 8 additions & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ option(MNN_SUPPORT_DEPRECATED_OP "Enable MNN's tflite quantized op" ON)
option(MNN_DEBUG_MEMORY "MNN Debug Memory Access" OFF)
option(MNN_DEBUG_TENSOR_SIZE "Enable Tensor Size" OFF)
option(MNN_GPU_TRACE "Enable MNN Gpu Debug" OFF)
option(MNN_SUPPORT_RENDER "Enable MNN Render Ops" OFF)
option(MNN_PORTABLE_BUILD "Link the static version of third party libraries where possible to improve the portability of built executables" OFF)
option(MNN_SEP_BUILD "Build MNN Backends and expression separately. Only works with MNN_BUILD_SHARED_LIBS=ON" ON)
option(NATIVE_LIBRARY_OUTPUT "Native Library Path" OFF)
Expand Down Expand Up @@ -162,6 +163,9 @@ endif()
if(MNN_SUPPORT_DEPRECATED_OP)
add_definitions(-DMNN_SUPPORT_DEPRECATED_OP)
endif()
if(MNN_SUPPORT_RENDER)
add_definitions(-DMNN_SUPPORT_RENDER)
endif()

# debug options
if(MNN_DEBUG_MEMORY)
Expand Down Expand Up @@ -372,7 +376,7 @@ list(APPEND MNN_OBJECTS_TO_LINK $<TARGET_OBJECTS:MNNMath>)
list(APPEND MNN_TARGETS MNNMath)

# Transform
FILE(GLOB MNN_Transform_SRC ${CMAKE_CURRENT_LIST_DIR}/source/shape/* ${CMAKE_CURRENT_LIST_DIR}/source/geometry/*)
FILE(GLOB_RECURSE MNN_Transform_SRC ${CMAKE_CURRENT_LIST_DIR}/source/shape/* ${CMAKE_CURRENT_LIST_DIR}/source/geometry/*)
add_library(MNNTransform OBJECT ${MNN_Transform_SRC})
IF (NOT MNN_BUILD_MINI)
list(APPEND MNN_OBJECTS_TO_LINK $<TARGET_OBJECTS:MNNTransform>)
Expand Down Expand Up @@ -601,9 +605,12 @@ IF(MNN_BUILD_TRAIN OR MNN_BUILD_QUANTOOLS)
add_subdirectory(tools/train)
IF(MNN_SEP_BUILD)
list(APPEND MNN_DEPS MNNTrain)
list(APPEND MNN_DEPS MNNTrainUtils)
ELSE()
list(APPEND MNN_TARGETS MNNTrain)
list(APPEND MNN_TARGETS MNNTrainUtils)
list(APPEND MNN_OBJECTS_TO_LINK $<TARGET_OBJECTS:MNNTrain>)
list(APPEND MNN_OBJECTS_TO_LINK $<TARGET_OBJECTS:MNNTrainUtils>)
ENDIF()
ENDIF()

Expand Down
4 changes: 1 addition & 3 deletions docs/compile/cmake.md
Original file line number Diff line number Diff line change
Expand Up @@ -63,11 +63,8 @@ MNN使用CMake构建项目,CMake中的宏定义列表如下:
| MNN_VULKAN_DEBUG | 是否打开Vulkan的DEBUG模式,该宏仅在`MNN_VULKAN=ON`时生效,默认为`OFF` |
| MNN_OPENGL_REGEN | 是否重新生成OpenGL Kenel,该宏仅在`MNN_OPENGL=ON`时生效,默认为`OFF` |
| MNN_TRT_DYNAMIC | 是否通过dlopen的方式引入TRT的动态库,该宏仅在`MNN_TENSORRT=ON`时生效,默认为`OFF |
| TF_CONVERT_ORIGIN | 构建的`MNNConvert`是否使用原始TF转换模式,该宏仅在`MNN_BUILD_CONVERTER=ON`时生效,默认为`OFF` |
| TFMODEL_OPTIMIZE | 构建的`MNNConvert`是否对Tensorflow模型执行优化,该宏仅在`MNN_BUILD_CONVERTER=ON`时生效,默认为`OFF` |
| MNN_BUILD_TORCH | 构建的`MNNConvert`是否支持`TorchScript`,该宏仅在`MNN_BUILD_CONVERTER=ON`时生效,默认为`OFF` |
| MNN_TRAIN_DEBUG | 构建的训练模块是否支持调试,该宏仅在`MNN_BUILD_TRAIN=ON`时生效,默认为`OFF` |
| MNN_BUILD_TRAIN_MINI | 构建删减版训练模块,不构建`Dataset``model`,该宏仅在`MNN_BUILD_TRAIN=ON`时生效,默认为`OFF` |
| MNN_USE_OPENCV | 构建的训练Demo是否使用`OpenCV`依赖,该宏仅在`MNN_BUILD_TRAIN=ON`时生效,默认为`OFF` |
| MNN_IMGPROC_COLOR | 构建MNN的OpenCV功能是否开启`颜色空间转换`,默认为`ON` |
| MNN_IMGPROC_GEOMETRIC | 构建MNN的OpenCV功能是否开启`形变`,默认为`ON` |
Expand All @@ -83,4 +80,5 @@ MNN使用CMake构建项目,CMake中的宏定义列表如下:
| MNN_OPENCV_BENCH | 构建MNN的OpenCV功能是否开启性能benchmark,默认为`OFF` |
| MNN_VULKAN_IMAGE | 构建MNN的Vulkan后端时采用Image内存模式,以便支持FP16和部分移动端上GPU的加速,默认为`ON` |
| MNN_LOW_MEMORY | 是否支持低内存模式,支持低内存模式使用权值量化模型并设置`low_memory`则会使用计算时反量化,默认为`OFF` |
| MNN_SUPPORT_RENDER | 是否支持图形渲染相关算子实现,默认为 `OFF` |
| MNN_BUILD_LLM | 是否构建基于MNN的llm库和demo,默认为`OFF` |
6 changes: 3 additions & 3 deletions docs/compile/pymnn.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@
## 本地安装
```bash
cd /path/to/MNN/pymnn/pip_package
python build_deps.py
python setup.py install --version {MNN版本}
python build_deps.py {MNN依赖包组合} #internal,cuda,trt,cuda_tune,opencl,vulkan,render,no_sse,torch这几个字符串的任意组合,例如字符串可为:"cuda,reder,no_sse"
python setup.py install --version {MNN版本} --deps {MNN依赖包组合}
```
## 构建Python Wheel包
- Linux
Expand Down Expand Up @@ -41,4 +41,4 @@ python setup.py install --version {MNN版本}
.\package_scripts\win\build_whl.ps1 -version {MNN版本} -backends "opencl,vulkan" -path MNN-CPU-OPENCL/py_whl/x64 -pyenvs "py27,py37,py38,py39"
# CPU+OpenCL+Vulkan,32位编译
.\package_scripts\win\build_whl.ps1 -version {MNN版本} -backends "opencl,vulkan" -x86 -path MNN-CPU-OPENCL/py_whl/x86 -pyenvs "py27-win32,py37-win32,py38-win32,py39-win32"
```
```
6 changes: 2 additions & 4 deletions docs/compile/tools.md
Original file line number Diff line number Diff line change
Expand Up @@ -29,10 +29,8 @@
- 编译产物
- `MNNTrain` 训练框架库
- `runTrainDemo.out` 运行训练框架demo的入口程序
- `transformer.out` 训练模型转换器
- `train.out` 训练功能入口程序
- `rawDataTransform.out` 将json文件转换为flatbuffers文件
- `dataTransformer.out` 将图片转换为flatbuffers文件
- `transformer` 训练模型转换器,将推理用的MNN模型转换为执行训练的MNN模型
- `extractForInfer` 从执行训练的MNN模型中提取参数,对应更新推理用的MNN模型
## 测试工具
- 相关编译选项
- `MNN_BUILD_TOOL` 是否编译测试工具
Expand Down
35 changes: 35 additions & 0 deletions docs/inference/module.md
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,41 @@ std::unique_ptr<Module> module; // module
module.reset(Module::load(input_names, output_names, model_filename.c_str(), rtMgr, &mdconfig));
```

### Module::Config
创建`Module`时可传入`Module::Config`,具体结构如下:

```cpp
struct Config {
// Load module as dynamic, default static
bool dynamic = false;

// for static mode, if the shape is mutable, set true, otherwise set false to avoid resizeSession freqencily
bool shapeMutable = true;
// Pre-rearrange weights or not. Disabled by default.
// The weights will be rearranged in a general way, so the best implementation
// may not be adopted if `rearrange` is enabled.
bool rearrange = false;

BackendInfo* backend = nullptr;
};
```
#### dynamic
- 默认为 false ,输出的变量为const ,只能得到数据
- 若 dynamic = true ,加载出的模型将按动态图方式运行,会增加额外构图耗时,但可以保存输出变量的计算路径,存成模型
- 若 dynamic = true ,后面的 shapeMutable / rearrange 不再生效
#### shapeMutable
- 默认为 true ,表示输入形状易变,将延迟进行形状相关计算
- 设置为 false 时,会提前申请内存,在 onForward 时做输入数据的拷贝而不是直接使用指针
#### rearrange
- 若为 true ,在创建 Module 时会预先创建卷积算子,做权重重排,以降低运行时的内存
- 目前只支持 CPU 和 CUDA 后端
#### backend
已经废弃,不要设置此项
### 获取模型信息
调用`getInfo`函数可获取`Module`信息,可以参考代码:`tools/cpp/GetMNNInfo.cpp`,[工具](../tools/test.html#getmnninfo)
```cpp
Expand Down
48 changes: 47 additions & 1 deletion docs/pymnn/expr.md
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,52 @@ array([0., 1., 2., 3.], dtype=float32)
'Input'
```
---
### `set_lazy_mode(mode)`
设置惰性计算的模式,仅在开启惰性求值的状态下生效,

- 0 : 所有计算均延迟执行
- 1 : 立即进行几何计算,内容计算延迟执行,适用于构建静态模型或训练时求导

默认为0


参数:
- `x:int` 模式类型

返回:`None`

返回类型:`None`

示例:
```python
>>> expr.lazy_eval(True)
>>> expr.set_lazy_mode(0)
>>> y = expr.concat([x], -1)
>>> expr.save([y], "concat.mnn") # 模型中为 concat 算子
>>> expr.set_lazy_mode(1)
>>> y = expr.concat([x], -1)
>>> expr.save([y], "concat_static.mnn") # 模型中为 raster 算子
```

---
### `set_global_executor_config(backend, precision, threadnum)`
设置expr运行后端、精度、线程数(gpu代表mode):

参数:
- `backend:int` 例如:0->CPU 1->Metal 2->CUDA 3->OPENCL
- `precision:int` 例如:0—>Normal 1->High 2->Low
- `threadnum:int` 例如:CPU表示线程数 GPU表示Mode

返回:`None`

返回类型:`None`

示例:

```python
>>> expr.set_global_executor_config(2, 2, 1)
```
---
### `sign(x)`
返回输入值的符号,正数返回1,负数返回-1

Expand Down Expand Up @@ -3054,4 +3100,4 @@ dict_keys(['conv1', 'conv2_1/dw', 'conv2_1/sep', 'conv2_2/dw', 'conv2_2/sep', 'c
dict_keys(['data'])
>>> outputs.keys()
dict_keys(['prob'])
```
```
5 changes: 3 additions & 2 deletions docs/tools/test.md
Original file line number Diff line number Diff line change
Expand Up @@ -87,11 +87,12 @@ Avg= 5.570600 ms, OpSum = 7.059200 ms min= 3.863000 ms, max= 11.596001 ms
- 16 : 适用于使用 GPU 的情况,由 MNN 优先选择 CPU 运行,并将 GPU 的 tuning 信息存到 cache 文件,所有算子 tuning 完成则启用 GPU
- 32 : rearrange 设为 true ,降低模型加载后的内存大小,但会增加模型加载的初始化时间
- 64 : 创建模型后,clone 出一个新的模型运行,用于测试 clone 功能(主要用于多并发推理)的正确性
- 128 : 使用文件夹下面的 input.mnn 和 output.mnn 做为输入和对比输出,对于数据量较大的情况宜用此方案


### 示例
```bash
$ python ../tools/script/fastTestOnnx.py mobilenetv2-7.onnx
$ python ../tools/script/testMNNFromOnnx.py mobilenetv2-7.onnx
$ ./ModuleBasic.out mobilenetv2-7.mnn onnx 0 0 10
Test mobilenetv2-7.mnn from input info: onnx
input
Expand All @@ -114,7 +115,7 @@ Avg= 9.946699 ms, min= 9.472000 ms, max= 10.227000 ms
- `model:str` 模型文件路径
- `forwardType:int` 执行推理的计算设备,有效值为:0(CPU)、1(Metal)、2(CUDA)、3(OpenCL)、6(OpenGL),7(Vulkan) ,9 (TensorRT)
- `shapeMutable:int` 输入形状是否可变
- `dir_n:str` 输入输出信息文件夹,可使用 fastTestOnnx.py / fastTestTf.py / fastTestTflite.py 等脚本生成,参考模型转换的正确性校验部分
- `dir_n:str` 输入输出信息文件夹,可使用 testMNNFromOnnx.py 等脚本生成,参考模型转换的正确性校验部分
```bash
./SequenceModuleTest.out transformer.mnn 0 1 tr tr1 tr2 tr3 tr4 > error.txt
```
Expand Down
4 changes: 2 additions & 2 deletions express/Executor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,7 @@ std::shared_ptr<Executor> Executor::getGlobalExecutor() {
info.type = MNN_FORWARD_CPU;
info.numThread = 1;
std::shared_ptr<Runtime> bn(creator->onCreate(info));
bn->setAllocatorType(info.allocator);
gExecutor = new std::shared_ptr<Executor>(new Executor(bn, MNN_FORWARD_CPU, 1));
});
return *gExecutor;
Expand Down Expand Up @@ -668,10 +669,9 @@ std::shared_ptr<Executor::SubGraph> Executor::findSubGraph(const std::string& su
}
return iter->second;
}
void Executor::setLazyComputeMode(LazyMode mode) {
void Executor::setLazyComputeMode(uint32_t mode) {
mLazyMode = mode;
}


} // namespace Express
} // namespace MNN
56 changes: 30 additions & 26 deletions express/Expr.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -193,8 +193,11 @@ EXPRP Expr::create(std::shared_ptr<BufferStorage> extra, std::vector<VARP>&& inp
expr->mStorage = extra;
expr->mOp = flatbuffers::GetRoot<Op>(extra->buffer());
expr->mInputs = std::move(inputs);
expr->mInside->mReq = ExecutorScope::Current()->getRequirement(expr.get());
_addLinkForInputs(expr);
auto exe = ExecutorScope::Current();
expr->mInside->mReq = exe->getRequirement(expr.get());
if (!(exe->getLazyMode() & Executor::LAZY_COMPUTE_ONCE)) {
_addLinkForInputs(expr);
}
return expr;
}

Expand Down Expand Up @@ -350,7 +353,7 @@ VARP Variable::create(EXPRP expr, int index) {
}
// CONTENT Mode
do {
if (executor->getLazyMode() != Executor::LAZY_CONTENT) {
if (!(executor->getLazyMode() & Executor::LAZY_CONTENT)) {
break;
}
if (expr->get() == nullptr) {
Expand Down Expand Up @@ -1016,7 +1019,6 @@ blob->dataType = DataType_DT_##TYPE;

void Variable::save(const std::vector<VARP>& vars, NetT* dest) {
auto executeOrder = getExecuteOrder(vars);

// Search subgraphs
std::map<std::string, std::shared_ptr<Executor::SubGraph>> subgraphs;
auto exe = ExecutorScope::Current();
Expand Down Expand Up @@ -1086,15 +1088,9 @@ void Variable::save(const std::vector<VARP>& vars, NetT* dest) {
blob->dataFormat = (MNN_DATA_FORMAT)Utils::convertFormat(info.order);
blob->dims = info.dim;
if (info.type.code == halide_type_float) {
if (info.type.bits == 16) {
blob->dataType = DataType_DT_BFLOAT16;
blob->uint8s.resize(info.size * 2);
::memcpy(blob->uint8s.data(), ptr, info.size * sizeof(int16_t));
} else {
blob->dataType = DataType_DT_FLOAT;
blob->float32s.resize(info.size);
::memcpy(blob->float32s.data(), ptr, info.size * sizeof(float));
}
blob->dataType = DataType_DT_FLOAT;
blob->float32s.resize(info.size);
::memcpy(blob->float32s.data(), ptr, info.size * sizeof(float));
} else if (info.type.code == halide_type_int && info.type.bits == 32) {
blob->dataType = DataType_DT_INT32;
blob->int32s.resize(info.size);
Expand All @@ -1107,6 +1103,10 @@ void Variable::save(const std::vector<VARP>& vars, NetT* dest) {
blob->dataType = DataType_DT_UINT8;
blob->uint8s.resize(info.size);
::memcpy(blob->uint8s.data(), ptr, info.size * sizeof(uint8_t));
} else if (info.type.code == halide_type_bfloat && info.type.bits == 16) {
blob->dataType = DataType_DT_BFLOAT16;
blob->uint8s.resize(info.size * 2);
::memcpy(blob->uint8s.data(), ptr, info.size * sizeof(int16_t));
}
op->type = OpType_Const;
if (expr->mType == VARP::TRAINABLE) {
Expand Down Expand Up @@ -1163,12 +1163,14 @@ void Variable::save(const std::vector<VARP>& vars, NetT* dest) {
dest->tensorName[subindex] = op->name + numberToString(v);
}
}
if (staticModel) {
auto tensor = expr->inside()->mOutputTensors[v];
auto tensor = expr->inside()->mOutputTensors[v];

if (staticModel || TensorUtils::getDescribe(tensor)->quantAttr) {
auto des = TensorUtils::getDescribe(tensor);
auto describe = std::unique_ptr<MNN::TensorDescribeT>(new MNN::TensorDescribeT);
describe->index = varIndexInfo[expr] + v;
describe->blob = std::unique_ptr<MNN::BlobT>(new MNN::BlobT);
describe->name = dest->tensorName[subindex];
auto& blob = describe->blob;
blob->dataFormat = des->dimensionFormat;
if (tensor->getType() == halide_type_of<float>()) {
Expand All @@ -1190,18 +1192,20 @@ void Variable::save(const std::vector<VARP>& vars, NetT* dest) {
describe->quantInfo->zero = tensorDes->quantAttr->zero;
describe->quantInfo->scale = tensorDes->quantAttr->scale;
}
for (auto& reg : des->regions) {
auto regionT = std::unique_ptr<MNN::RegionT>(new MNN::RegionT);
regionT->src = std::unique_ptr<MNN::ViewT>(new MNN::ViewT);
regionT->dst = std::unique_ptr<MNN::ViewT>(new MNN::ViewT);
regionT->src->offset = reg.src.offset;
regionT->dst->offset = reg.dst.offset;
for (int s = 0; s < 3; s++) {
regionT->src->stride.push_back(reg.src.stride[s]);
regionT->dst->stride.push_back(reg.dst.stride[s]);
regionT->size.push_back(reg.size[s]);
if (staticModel) {
for (auto& reg : des->regions) {
auto regionT = std::unique_ptr<MNN::RegionT>(new MNN::RegionT);
regionT->src = std::unique_ptr<MNN::ViewT>(new MNN::ViewT);
regionT->dst = std::unique_ptr<MNN::ViewT>(new MNN::ViewT);
regionT->src->offset = reg.src.offset;
regionT->dst->offset = reg.dst.offset;
for (int s = 0; s < 3; s++) {
regionT->src->stride.push_back(reg.src.stride[s]);
regionT->dst->stride.push_back(reg.dst.stride[s]);
regionT->size.push_back(reg.size[s]);
}
describe->regions.emplace_back(std::move(regionT));
}
describe->regions.emplace_back(std::move(regionT));
}
dest->extraTensorDescribe.emplace_back(std::move(describe));
}
Expand Down
1 change: 0 additions & 1 deletion express/NeuralNetWorkOp.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1327,7 +1327,6 @@ VARP _Range(VARP start, VARP limit, VARP delta) {
std::unique_ptr<OpT> op(new OpT);
op->type = OpType_Range;
auto rangeParam = new RangeT;
rangeParam->Tidx = (MNN::DataType)Utils::convertDataType(start->getInfo()->type);
op->main.type = OpParameter_Range;
op->main.value = rangeParam;
return Variable::create(Expr::create(std::move(op), {start, limit, delta}));
Expand Down
2 changes: 1 addition & 1 deletion express/Utils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ halide_type_t Utils::revertDataType(DataType dataType) {
CONVERT(DataType_DT_UINT8, halide_type_of<uint8_t>(), dataType);
CONVERT(DataType_DT_INT8, halide_type_of<int8_t>(), dataType);
CONVERT(DataType_DT_HALF, halide_type_of<float>(), dataType);
CONVERT(DataType_DT_BFLOAT16, halide_type_t(halide_type_float, 16), dataType);
CONVERT(DataType_DT_BFLOAT16, halide_type_t(halide_type_bfloat, 16), dataType);
return halide_type_of<float>();
}
Express::Dimensionformat Utils::revertFormat(int format) {
Expand Down
2 changes: 1 addition & 1 deletion express/module/PipelineModule.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -518,7 +518,7 @@ static Module* _createSubModule(std::shared_ptr<BufferStorage> bufferStorage, co
scheduleInfo.defaultBackend = sharedConst->defaultBackend;
scheduleInfo.constReplaceBackend = sharedConst->constReplaceBackend;
scheduleInfo.allTensors = sharedConst->allTensors;
initTensors(scheduleInfo.allTensors, net);
scheduleInfo.validForResize = initTensors(scheduleInfo.allTensors, net);
std::vector<Schedule::OpCacheInfo> oplists;
std::vector<const Op*> ops;
ops.reserve(info.opList.size());
Expand Down
6 changes: 5 additions & 1 deletion express/module/StaticModule.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -367,7 +367,11 @@ std::vector<Express::VARP> StaticModule::onForward(const std::vector<Express::VA
if (mResource->mUseContentInputs) {
mSession->setNeedResize();
}
mSession->resize();
auto code = mSession->resize();
if (NO_ERROR != code) {
FUNC_PRINT(code);
return {};
}
} else {
// Resize
for (int i = 0; i < inputs.size(); ++i) {
Expand Down
5 changes: 3 additions & 2 deletions include/MNN/HalideRuntime.h
Original file line number Diff line number Diff line change
Expand Up @@ -60,8 +60,9 @@ typedef enum halide_type_code_t
{
halide_type_int = 0, //!< signed integers
halide_type_uint = 1, //!< unsigned integers
halide_type_float = 2, //!< floating point numbers
halide_type_handle = 3 //!< opaque pointer type (void *)
halide_type_float = 2, //!< IEEE floating point numbers
halide_type_handle = 3, //!< opaque pointer type (void *)
halide_type_bfloat = 4 //!< floating point numbers in the bfloat format
} halide_type_code_t;

// Note that while __attribute__ can go before or after the declaration,
Expand Down
Loading

0 comments on commit 085e9d3

Please sign in to comment.