Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Binary file modified benchmark/models/MobileNetV2_224.mnn
Binary file not shown.
Binary file modified benchmark/models/SqueezeNetV1.0.mnn
Binary file not shown.
Binary file modified benchmark/models/inception-v3.mnn
Binary file not shown.
Binary file modified benchmark/models/mobilenet-v1-1.0.mnn
Binary file not shown.
Binary file modified benchmark/models/mobilenetV3.mnn
Binary file not shown.
Binary file modified benchmark/models/nasnet.mnn
Binary file not shown.
Binary file modified benchmark/models/resnet-v2-50.mnn
Binary file not shown.
Binary file modified benchmark/models/squeezenetv1.1.mnn
Binary file not shown.
1 change: 1 addition & 0 deletions docs/transformers/llm.md
Original file line number Diff line number Diff line change
Expand Up @@ -384,6 +384,7 @@ node llm_demo.js ~/qwen2.0_1.5b/config.json ~/qwen2.0_1.5b/prompt.txt
- bigvgan_model: 当使用Omni模型时,bigvgan_model的实际路径为`base_dir + bigvgan_model`,默认为`base_dir + 'bigvgan.mnn'`
- spk_dict: 当使用Omni模型时,spk_dict的实际路径为`base_dir + spk_dict`,默认为`base_dir + 'spk_dict.txt'`
- 推理配置
- use_tmac: 基于TMac方案进行LLM模型推理,默认为`false`
- max_new_tokens: 生成时最大token数,默认为`512`
- reuse_kv: 多轮对话时是否复用之前对话的`kv cache`,默认为`false`.
- quant_qkv: CPU attention 算子中`query, key, value`是否量化,可选为:`0, 1, 2, 3, 4`,默认为`0`,含义如下:
Expand Down
26 changes: 13 additions & 13 deletions include/MNN/Interpreter.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,7 @@ class MNN_PUBLIC Interpreter {
*/
static Interpreter* createFromBuffer(const void* buffer, size_t size);
~Interpreter();

/**
* @brief destroy Interpreter
* @param model given Interpreter to release.
Expand Down Expand Up @@ -153,18 +153,18 @@ class MNN_PUBLIC Interpreter {
Session_Backend_Auto = 9, // Auto Determine the Op type by MNN

/** Determine static memory whether recyle in resizeSession or just cache the memory */
Session_Memory_Collect = 10, // Recycle static memory when session resize in case memory explosion
Session_Memory_Collect = 10, // Recycle static memory when session resize in case memory explosion
Session_Memory_Cache = 11, // Cache the static memory for next forward usage

/** Determine whether use codegen function */
Session_Codegen_Disable = 12, // Disable codegen in case extra build codegen cost
Session_Codegen_Enable = 13, // Enable codegen

/** Dynamic Reisze Optimization */
Session_Resize_Check = 14, // Open Trace for resize
Session_Resize_Fix = 15, // Apply Resize Optimization
/** Set for Module's traceOrOptimize API.

/** Set for Module's traceOrOptimize API.
Module_Forward_Seperate:
when inputs is not empty , Module's onForward will only infer shape and alloc memory.
when inputs is empty , Module's onForward will only runSession to compute content.
Expand Down Expand Up @@ -199,7 +199,7 @@ class MNN_PUBLIC Interpreter {
* If resize session generate new cache info, try to rewrite cache file.
* If resize session do not generate any new cache info, just do nothing.
* @param session given session
* @param flag Protected param, not used now
* @param flag Protected param, not used now
*/

ErrorCode updateCacheFile(Session *session, int flag = 0);
Expand Down Expand Up @@ -247,7 +247,7 @@ class MNN_PUBLIC Interpreter {
// mmap allocate file size, KB
MMAP_FILE_SIZE = 11,
USE_CACHED_MMAP = 12,

// Multi-Thread Load module, default is 0 (don't use other Thread)
INIT_THREAD_NUMBER = 13,

Expand All @@ -270,7 +270,7 @@ class MNN_PUBLIC Interpreter {
enum ExternalPathType {
// Path of the kvcache directory
EXTERNAL_PATH_KVCACHE_DIR = 0,

// Mid Buffer Cache File
EXTERNAL_FEATUREMAP_DIR = 1,

Expand All @@ -282,7 +282,7 @@ class MNN_PUBLIC Interpreter {

// Path of the kvcache directory
EXTERNAL_PATH_PREFIXCACHE_DIR = 4,

// Other types ...
};

Expand All @@ -295,10 +295,10 @@ class MNN_PUBLIC Interpreter {

// Use loop instead of raster + compute if possible
GEOMETRCOMPUTEMASK_USELOOP = 1 << 2,

// Support Geometry Cache, if shape changed, will try recompute, and then run compute if failed
GEOMETRCOMPUTEMASK_OPENCACHE = 1 << 3,

// Full option open mask, for example, if want to close useloop, can set mask as (GEOMETRCOMPUTEMASK_ALL - GEOMETRCOMPUTEMASK_USELOOP)
GEOMETRCOMPUTEMASK_ALL = 0xFFFF,
};
Expand Down Expand Up @@ -369,7 +369,7 @@ class MNN_PUBLIC Interpreter {
*/
void resizeSession(Session* session, int needRelloc);


/**
* @brief call this function if don't need resize or create session any more, it will save a few memory that equal
* to the size of model buffer
Expand Down Expand Up @@ -459,7 +459,7 @@ class MNN_PUBLIC Interpreter {
RuntimeManager::getInfo: 0: no resize, 1: re-malloc, 2: resize
*/
RESIZE_STATUS = 3,

/** Mode / NumberThread, int* */
THREAD_NUMBER = 4,

Expand Down
203 changes: 193 additions & 10 deletions project/android/build.gradle
Original file line number Diff line number Diff line change
@@ -1,34 +1,154 @@
// Helper function to parse boolean properties supporting ON/OFF and true/false
def getBooleanProperty(value) {
if (value instanceof Boolean) {
return value
}
String str = value.toString().toLowerCase()
return str == 'true' || str == 'on' || str == 'yes' || str == '1'
}

ext {
MTLPublishing = project.hasProperty('MTLDeployVersion')
update2MTLVersionFrom = { defVal ->
try{
if (MTLPublishing){
println "get mtl deploy version for ${getName()}:$MTLDeployVersion"
return MTLDeployVersion
}
}catch (Exception ignored){}
return defVal
}
artifactName = ''

// Build option controls
BUILD_LLM = project.hasProperty('BUILD_LLM') ? getBooleanProperty(project.BUILD_LLM) : false
BUILD_LLM_VISION = project.hasProperty('BUILD_LLM_VISION') ? getBooleanProperty(project.BUILD_LLM_VISION) : false
BUILD_LLM_DEMO = project.hasProperty('BUILD_LLM_DEMO') ? getBooleanProperty(project.BUILD_LLM_DEMO) : false
BUILD_QNN = project.hasProperty('BUILD_QNN') ? getBooleanProperty(project.BUILD_QNN) : false
}

//print build_llm
println "BUILD_LLM: $BUILD_LLM"
println "BUILD_LLM_VISION: $BUILD_LLM_VISION"
println "BUILD_LLM_DEMO: $BUILD_LLM_DEMO"
println "BUILD_QNN: $BUILD_QNN"


buildscript {
repositories {
google()
jcenter()
mavenCentral()
mavenLocal()
maven{
url "http://mvnrepo.alibaba-inc.com/mvn/repository"
}
}
dependencies {
classpath 'com.android.tools.build:gradle:3.2.1'
classpath 'com.android.tools.build:gradle:4.2.2'
}
}

repositories {
mavenLocal()
google()
maven{
url "http://mvnrepo.alibaba-inc.com/mvn/repository"
}
}

apply plugin: 'com.android.library'
apply plugin: 'maven'
apply plugin: 'maven-publish'

group = 'com.taobao.android'
artifactName = 'alinn'
version = update2MTLVersionFrom('2.0.9.9-android-SNAPSHOT')
description = 'MNN Framework'


android {
compileSdkVersion 28
compileSdkVersion 34
ndkVersion "27.2.12479018"

archivesBaseName = artifactName

defaultConfig {
minSdkVersion 14
targetSdkVersion 28
testInstrumentationRunner "android.support.test.runner.AndroidJUnitRunner"
minSdkVersion 21
targetSdkVersion 34
testInstrumentationRunner "androidx.test.runner.AndroidJUnitRunner"
versionCode 1
versionName version
externalNativeBuild {
cmake {
arguments "-DANDROID_ARM_NEON=TRUE", "-DANDROID_PLATFORM=android-21", "-DMNN_OPENCL=true", "-DANDROID_STL=c++_shared"
def cmakeArgs = [
"-DANDROID_ARM_NEON=TRUE",
"-DANDROID_PLATFORM=android-21",
"-DMNN_ARM82=true",
"-DMNN_OPENCL=true",
"-DMNN_VULKAN=false",
"-DMNN_WITH_PLUGIN=true",
"-DMNN_INTERNAL=false",
"-DANDROID_STL=c++_shared",
"-DMNN_BUILD_OPENCV=true",
"-DMNN_IMGPROC_COLOR=true",
"-DMNN_IMGPROC_GEOMETRIC=true",
"-DMNN_IMGPROC_DRAW=true",
"-DMNN_IMGPROC_FILTER=true",
"-DMNN_IMGPROC_MISCELLANEOUS=true",
"-DMNN_IMGPROC_STRUCTRAL=true",
"-DMNN_CALIB3D=true",
"-DMNN_IMGPROC_HISTOGRAMS=true",
"-DMNN_SUPPORT_DEPRECATED_OP=false",
"-DMNN_LOW_MEMORY=true",
"-DANDROID_SUPPORT_FLEXIBLE_PAGE_SIZES=ON",
"-DMNN_SME2=OFF"
]

// Conditionally add LLM options
if (BUILD_LLM) {
cmakeArgs.addAll([
"-DMNN_BUILD_LLM=true",
"-DMNN_SUPPORT_TRANSFORMER_FUSE=true",
"-DMNN_LLM_BUILD_DEMO=${BUILD_LLM_DEMO}",
"-DLLM_SUPPORT_HTTP_RESOURCE=false"
])
}

// Disable KleidaiAI for Android build to avoid SVE2 issues
cmakeArgs.add("-DMNN_KLEIDIAI=false")

// Conditionally add LLM Vision options
if (BUILD_LLM_VISION) {
cmakeArgs.addAll([
"-DLLM_SUPPORT_VISION=true",
"-DMNN_IMGCODECS=true"
])
}

// Conditionally add QNN options
if (BUILD_QNN) {
cmakeArgs.addAll([
"-DMNN_QNN=ON",
"-DMNN_QNN_ONLINE_FINALIZE=OFF",
"-DMNN_WITH_PLUGIN=ON"
])

// Pass QNN SDK root to CMake via property or environment, avoiding reliance on Gradle daemon env
def qnnRoot = null
if (project.hasProperty('QNN_SDK_ROOT')) {
qnnRoot = project.property('QNN_SDK_ROOT')
} else if (System.getenv('QNN_SDK_ROOT') != null) {
qnnRoot = System.getenv('QNN_SDK_ROOT')
}
if (qnnRoot != null && qnnRoot.trim()) {
cmakeArgs.add("-DQNN_SDK_ROOT=${qnnRoot}")
println "Using QNN_SDK_ROOT from Gradle/CMake: ${qnnRoot}"
} else {
println "Warning: QNN_SDK_ROOT is not provided; CMake will fallback to ENV variable if available."
}
}
arguments(*cmakeArgs)
// arguments "-DANDROID_ARM_NEON=TRUE", "-DANDROID_PLATFORM=android-21", "-DMNN_ARM82=true", "-DMNN_OPENCL=true", "-DMNN_VULKAN=false", "-DMNN_WITH_PLUGIN=true", "-DMNN_INTERNAL=false", "-DANDROID_STL=c++_shared", "-DMNN_BUILD_OPENCV=true", "-DMNN_IMGPROC_COLOR=true", "-DMNN_IMGPROC_GEOMETRIC=true", "-DMNN_IMGPROC_DRAW=true", "-DMNN_IMGPROC_FILTER=true", "-DMNN_IMGPROC_MISCELLANEOUS=true", "-DMNN_IMGPROC_STRUCTRAL=true", "-DMNN_CALIB3D=true", "-DMNN_IMGPROC_HISTOGRAMS=true", "-DMNN_SUPPORT_DEPRECATED_OP=false", "-DMNN_LOW_MEMORY=true"
abiFilters 'armeabi-v7a', 'arm64-v8a'
}
}
Expand All @@ -41,8 +161,71 @@ android {
}

apply from: "nativepub.gradle"
apply from: "qnnprepare.gradle"

HashMap getAccount(){
HashMap accountMap = new HashMap()
def parsedSettingsXml
def settingsFile = '/home/admin/software/apache-maven-3.2.1/conf/settings.xml'
def defaultSettingsFile = System.getProperty("user.home") + "/.m2/settings.xml"
println("defaultSettingsFile: " + defaultSettingsFile)
if(file(settingsFile).exists()||file(defaultSettingsFile).exists()){
if(file(settingsFile).exists()){
parsedSettingsXml = (new groovy.util.XmlParser()).parse(settingsFile)
}else if(file(defaultSettingsFile).exists()){
parsedSettingsXml = (new groovy.util.XmlParser()).parse(defaultSettingsFile)
}

parsedSettingsXml.servers[0].server.each{ server ->
if("releases" == server.id.text()){
accountMap.put("id",server.id.text())
accountMap.put("username",server.username.text())
accountMap.put("password",server.password.text())

}

}

}else{
accountMap.put("id","releases")
accountMap.put("username","admin")
accountMap.put("password","screct")
}
return accountMap

task wrapper(type: Wrapper) {
gradleVersion = '4.1'
distributionUrl = 'http://mirrors.taobao.net/mirror/gradle/gradle-4.1-bin.zip'
}

publishing {
publications {
maven(MavenPublication) {
artifactId project.artifactName
artifact "${project.buildDir}/outputs/aar/${artifactName}-release.aar"
pom.packaging "aar"
}
}

if(version.endsWith("-SNAPSHOT")){
repositories{
mavenLocal()
maven{
url "http://mvnrepo.alibaba-inc.com/nexus/content/repositories/snapshots"
credentials {
username = "snapshotsAdmin"
password = "123456"
}
}
}
} else {
def accountMap = getAccount()
repositories {
mavenLocal()
maven {
url "http://mvnrepo.alibaba-inc.com/nexus/content/repositories/releases"
credentials {
username = accountMap.get("username")
password = accountMap.get("password")
}
}
}
}
}
Loading
Loading