alibaba
diff --git a/‎.gitignore‎
Lines changed: 3 additions & 1 deletion b/‎.gitignore‎
Lines changed: 3 additions & 1 deletion
diff --git a/‎README.md‎
Lines changed: 0 additions & 2 deletions b/‎README.md‎
Lines changed: 0 additions & 2 deletions
diff --git a/‎docs/inference/module.md‎
Lines changed: 15 additions & 0 deletions b/‎docs/inference/module.md‎
Lines changed: 15 additions & 0 deletions
diff --git a/‎docs/inference/npu.md‎
Lines changed: 4 additions & 0 deletions b/‎docs/inference/npu.md‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎docs/tools/convert.md‎
Lines changed: 9 additions & 0 deletions b/‎docs/tools/convert.md‎
Lines changed: 9 additions & 0 deletions
diff --git a/‎docs/transformers/llm.md‎
Lines changed: 13 additions & 0 deletions b/‎docs/transformers/llm.md‎
Lines changed: 13 additions & 0 deletions
diff --git a/‎express/Executor.cpp‎
Lines changed: 11 additions & 0 deletions b/‎express/Executor.cpp‎
Lines changed: 11 additions & 0 deletions
diff --git a/‎include/MNN/expr/Executor.hpp‎
Lines changed: 1 addition & 0 deletions b/‎include/MNN/expr/Executor.hpp‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎project/android/CMakeExports.txt‎
Lines changed: 6 additions & 0 deletions b/‎project/android/CMakeExports.txt‎
Lines changed: 6 additions & 0 deletions
diff --git a/‎project/android/gradle/wrapper/gradle-wrapper.properties‎
Lines changed: 1 addition & 1 deletion b/‎project/android/gradle/wrapper/gradle-wrapper.properties‎
Lines changed: 1 addition & 1 deletion
@@ -376,4 +376,6 @@ datasets/*
 
 # qnn 3rdParty
 source/backend/qnn/3rdParty/include
-apps/Android/MnnLlmChat/release_outputs
+project/android/.cxx
+pymnn/android/.cxx/
+pymnn/android/.cxx/abi_configuration_5u53tc49.json
@@ -11,8 +11,6 @@
 
 
 ## News 🔥
-- [2025/08/08] Now we support [gpt-oss-20b](./apps/Android/MnnLlmChat/README.md#releases).
-- [2025/08/05] MNN Chat Android is availabe in [GooglePlay](https://play.google.com/store/apps/details?id=com.alibaba.mnnllm.android.release) !
 - [2025/06/11] New App MNN TaoAvatar released, you can talk with 3DAvatar offline with LLM, ASR, TTS, A2BS and NNR models all run local on your device!! [MNN TaoAvatar](./apps/Android/Mnn3dAvatar/README.md) 
 <p align="center">
   <img width="20%" alt="Icon"  src="https://meta.alicdn.com/data/mnn/avatar/avatar_demo.gif" style="margin: 0 10px;">
 
@@ -183,6 +183,21 @@ struct Info {
 const Info* getInfo() const;
 ```
 
+### 获取设备信息
+调用`getDeviceInfo`函数可获取`Device`信息，可以参考代码：
+```cpp
+std::string soc_id, dsp_arch;
+bool success = MNN::Express::Executor::RuntimeManager::getDeviceInfo("dsp_arch", MNN_FORWARD_NN, dsp_arch);
+if(success) {
+    MNN_PRINT("Device dsp_arch: %s\n", dsp_arch.c_str());
+}
+
+success = MNN::Express::Executor::RuntimeManager::getDeviceInfo("soc_id", MNN_FORWARD_NN, soc_id);
+if(success) {
+    MNN_PRINT("Device soc_id: %s\n", soc_id.c_str());
+}
+```
+
 ### 执行推理
 调用`onForward`执行推理。
 
 
@@ -58,6 +58,10 @@ adb push ${MNN_ROOT}/source/backend/qnn/3rdParty/lib/hexagon-v${HEXAGON_ARCH}/un
 adb shell "cd /data/local/tmp && LD_LIBRARY_PATH=/data/local/tmp ADSP_LIBRARY_PATH=/data/local/tmp ./MyExe.out"
 ```
 
+### QNN量化功能说明
+- 仅权重量化（激活是浮点）：只支持Linear权重int8、channel-wise的对称量化。
+- 激活&权重都量化：支持激活per-tensor对称量化，权重是int8/int4、channel-wise的对称量化。
+
 ## CoreML
 适用于 Mac / iOS / iPad
 
 
@@ -388,3 +388,12 @@ npu model path:./qnn_smolvlm_model.bin
  
 ./ModuleBasic.out qnn_smolvlm_model.mnn dir 0 0 10
 ```
+### 生成多种QNN设备模型脚本
+tools/script/genQNNModelsFromMNN.py中提供了8Gen1 ~ 8Elite设备的QNN模型生成脚本
+```
+// 使用示例
+cd mnn_path
+cd build
+python3 ../tools/script/genQNNModelsFromMNN.py --config_path ../source/backend/qnn/convertor/config_example/ --graph_name visual_qnn --qnn_sdk_root_path /mnt/2Tpartition/tianbu/QNN/qairt/2.37.0.250724/ --src_model visual.mnn --executable_path ./MNN2QNNModel
+```
+后续将在qnn_models文件夹下生成8Gen1 ~ 8Elite设备的QNN模型产物。
@@ -106,17 +106,23 @@ optional arguments:
                         mnn quant bit, 4 or 8, default is 4.
   --quant_block QUANT_BLOCK
                         mnn quant block, 0 mean channle-wise, default is 128.
+  --visual_quant_bit VISUAL_QUANT_BIT
+                        mnn visual model quant bit, 4 or 8, default is setting in utils/vision.py by different vit model.
+  --visual_quant_block VISUAL_QUANT_BLOCK
+                        mnn visual model quant block, 0 mean channle-wise, default is setting in utils/vision.py by different vit model.
   --lm_quant_bit LM_QUANT_BIT
                         mnn lm_head quant bit, 4 or 8, default is `quant_bit`.
   --mnnconvert MNNCONVERT
                         local mnnconvert path, if invalid, using pymnn.
   --ppl                 Whether or not to get all logits of input tokens.
   --awq                 Whether or not to use awq quant.
   --sym                 Whether or not to using symmetric quant (without zeropoint), defualt is False.
+  --visual_sym          Whether or not to using symmetric quant (without zeropoint) for visual model, defualt is False.
   --seperate_embed      For lm and embed shared model, whether or not to sepearte embed to avoid quant, defualt is False, if True, embed weight will be seperate to embeddingbf16.bin.
   --lora_split          Whether or not export lora split, defualt is False.
 ```
 
+
 ### 权重读取
 llmexport.py 同时支持 LLM 的验证功能，有较多的依赖。在没有相应环境的情况下，MNN-LLM也提供由 safetensors 或 gguf 文件读取权重的工具，可以降低内存需求，提高转换速度。使用方法如下：
 
@@ -166,6 +172,7 @@ python3 gguf2mnn.py --gguf ~/third/llama.cpp/build/ggml-model-Q4_K.gguf --mnn_di
 ```
 -DLLM_SUPPORT_VISION=true -DMNN_BUILD_OPENCV=true -DMNN_IMGCODECS=true
 ```
+
 - 需要开启音频功能时，增加相关编译宏
 ```
 -DLLM_SUPPORT_AUDIO=true -DMNN_BUILD_AUDIO=true
@@ -195,6 +202,12 @@ cd project/android
 mkdir build_64
 ../build_64.sh -DMNN_LOW_MEMORY=true -DMNN_CPU_WEIGHT_DEQUANT_GEMM=true -DMNN_BUILD_LLM=true -DMNN_SUPPORT_TRANSFORMER_FUSE=true -DMNN_ARM82=true -DMNN_OPENCL=true -DMNN_USE_LOGCAT=true
 ```
+高通设备部分视觉模型支持NPU功能，可增加`MNN_QNN` 和`MNN_WITH_PLUGIN`的宏启用QNN功能。
+```
+cd project/android
+mkdir build_64
+../build_64.sh -DMNN_LOW_MEMORY=true -DMNN_CPU_WEIGHT_DEQUANT_GEMM=true -DMNN_BUILD_LLM=true -DMNN_SUPPORT_TRANSFORMER_FUSE=true -DMNN_ARM82=true -DMNN_OPENCL=true -DMNN_QNN=true -DMNN_WITH_PLUGIN=true -DMNN_USE_LOGCAT=true
+```
 
 #### iOS: 参考 transformers/llm/engine/ios/README.md
 ```
 
@@ -281,6 +281,17 @@ bool Executor::RuntimeManager::getInfo(Interpreter::SessionInfoCode code, void*
     return false;
 }
 
+bool Executor::RuntimeManager::getDeviceInfo(const std::string& deviceKey, const MNNForwardType type, std::string& deviceValue) {
+    auto creator = MNNGetExtraRuntimeCreator(type);
+    if (creator != nullptr) {
+        auto res = creator->onGetDeviceInfo(deviceKey, deviceValue);
+        if(res) {
+            return true;
+        }
+    }
+    return false;
+}
+
 Executor::RuntimeManager::RuntimeManager() {
     mInside = new RuntimeAttr;
     mInside->mContent.reset(new RuntimeAttr::Immutable);
 
@@ -130,6 +130,7 @@ class MNN_PUBLIC Executor {
         void setHint(Interpreter::HintMode mode, int* value, size_t size);
         void setHintPtr(Interpreter::HintMode mode, void* value);
         bool getInfo(Interpreter::SessionInfoCode code, void* ptr);
+        static bool getDeviceInfo(const std::string& deviceKey, const MNNForwardType type, std::string& deviceValue);
         BackendConfig* getBnConfig();
         const RuntimeAttr* getInside() const {
             return mInside;
 
@@ -25,3 +25,9 @@ set_target_properties( MNNOpenCV
                 PROPERTIES IMPORTED_LOCATION
                 ${CMAKE_CURRENT_LIST_DIR}/libs/${ANDROID_ABI}/libMNNOpenCV.so
                 )
+
+add_library(MNN_LLM  SHARED IMPORTED GLOBAL )
+set_target_properties(MNN_LLM
+            PROPERTIES IMPORTED_LOCATION
+            ${CMAKE_CURRENT_LIST_DIR}/libs/${ANDROID_ABI}/libllm.so
+)
@@ -3,4 +3,4 @@ distributionBase=GRADLE_USER_HOME
 distributionPath=wrapper/dists
 zipStoreBase=GRADLE_USER_HOME
 zipStorePath=wrapper/dists
-distributionUrl=http\://mtl-gradle-mirror.oss-cn-hangzhou.aliyuncs.com/gradle-4.6-all.zip
+distributionUrl=http://mtl-gradle-mirror.oss-cn-hangzhou.aliyuncs.com/gradle-6.7.1-all.zip