Skip to content

Commit ebb8c8f

Browse files
authored
Merge pull request #3505 from wangzhaode/feature/sync_314
[MNN:Sync] Sync Internal 3.1.4.
2 parents 67709fd + a019d97 commit ebb8c8f

File tree

94 files changed

+17997
-10705
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

94 files changed

+17997
-10705
lines changed

backupcode/cpubackend/compute/GemmInt8Executor.cpp

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -36,12 +36,11 @@ static void _makeResource(Backend* backend, std::shared_ptr<CPUConvolution::Reso
3636
auto alphaPtr = resource->mDequantize.mScaleBias->host<float>();
3737
auto biasPtr = reinterpret_cast<float*>(reinterpret_cast<uint8_t*>(alphaPtr) + ocUp4 * core->bytes);
3838
::memset(alphaPtr, 0, 2 * ocUp4 * core->bytes);
39-
auto wZero = resourceInt8->mWeightQuantZero->host<int32_t>(); // has packed to outputUp4
4039
auto wScale = resourceInt8->mOriginScale->host<float>();
4140
int h = ocUp4;
4241
for (int i=0; i< h; ++i) {
4342
alphaPtr[i] = wScale[i];
44-
biasPtr[i] = (-1.f) * wZero[i] * wScale[i];
43+
biasPtr[i] = wScale[i + ocUp4];
4544
}
4645
}
4746

@@ -185,8 +184,8 @@ ErrorCode GemmInt8Executor::onExecute(const std::vector<Tensor *> &inputs, const
185184

186185
quanParam.useInt8 = 0; // Save result as float data type.
187186
quanParam.biasFloat = reinterpret_cast<float*>(mQuantBias.data());
188-
quanParam.weightQuanBias = mKernelSum.data();
189-
quanParam.extraScale = nullptr;
187+
quanParam.weightKernelSum = mKernelSum.data();
188+
quanParam.inputScale = nullptr;
190189
float dequantScale = mMutableResource.mResource->mInputScale;
191190

192191
SumByAxisParams sumParams;

docs/transformers/llm.md

Lines changed: 142 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@ python llmexport.py \
5454
├── llm.mnn.weight
5555
├── onnx/
5656
├──llm.onnx
57-
├──llm.onnx.data
57+
├──llm.onnx.data
5858
├── llm_config.json
5959
└── tokenizer.txt
6060
```
@@ -123,7 +123,7 @@ mnnconvert -f MNN --modelFile model/llm.mnn --JsonFile model/llm.mnn.json
123123
使用 safetensors2mnn.py 读取权重:
124124

125125
```
126-
python3 safetensors2mnn.py --path /Users/xtjiang/.cache/modelscope/hub/Qwen/Qwen2___5-0___5B-Instruct --mnn_dir model
126+
python3 safetensors2mnn.py --path /Users/xtjiang/.cache/modelscope/hub/Qwen/Qwen2___5-0___5B-Instruct --mnn_dir model
127127
```
128128

129129
safetensors2mnn.py 支持设定量化参数,和 llmexport.py 一致
@@ -237,7 +237,16 @@ node llm_demo.js ~/qwen2.0_1.5b/config.json ~/qwen2.0_1.5b/prompt.txt
237237
- embedding_model: 当embedding使用模型时,embedding的实际路径为`base_dir + embedding_model`,默认为`base_dir + 'embedding.mnn'`
238238
- embedding_file: 当embedding使用二进制时,embedding的实际路径为`base_dir + embedding_file`,默认为`base_dir + 'embeddings_bf16.bin'`
239239
- tokenizer_file: `tokenizer.txt`的实际名称路径为`base_dir + tokenizer_file`,默认为`base_dir + 'tokenizer.txt'`
240-
- visual_model: 当使用VL模型时,visual_model的实际路径为`base_dir + visual_model`,默认为`base_dir + 'visual.mnn'`
240+
- visual_model: 当使用VL模型时,visual_model的实际路径为`base_dir + visual_model`,默认为`base_dir + 'visual.mnn'`
241+
- audio_model: 当使用Audio模型时,audio_model的实际路径为`base_dir + audio_model`,默认为`base_dir + 'audio.mnn'`
242+
- Omni模型文件信息
243+
- talker_model: 当使用Omni模型时,talker_model的实际路径为`base_dir + talker_model`,默认为`base_dir + 'talker.mnn'`
244+
- talker_weight: 当使用Omni模型时,talker_weight的实际路径为`base_dir + talker_weight`,默认为`base_dir + 'talker.mnn.weight'`
245+
- talker_embedding_file: 当使用Omni模型时,talker_embedding_file的实际路径为`base_dir + talker_embedding_file`,默认为`base_dir + 'talker_embeddings_bf16.bin'`
246+
- predit_model: 当使用Omni模型时,predit_model的实际路径为`base_dir + predit_model`,默认为`base_dir + 'predit.mnn'`
247+
- dit_model: 当使用Omni模型时,dit_model的实际路径为`base_dir + dit_model`,默认为`base_dir + 'dit.mnn'`
248+
- bigvgan_model: 当使用Omni模型时,bigvgan_model的实际路径为`base_dir + bigvgan_model`,默认为`base_dir + 'bigvgan.mnn'`
249+
- spk_dict: 当使用Omni模型时,spk_dict的实际路径为`base_dir + spk_dict`,默认为`base_dir + 'spk_dict.txt'`
241250
- 推理配置
242251
- max_new_tokens: 生成时最大token数,默认为`512`
243252
- reuse_kv: 多轮对话时是否复用之前对话的`kv cache`,默认为`false`.
@@ -265,10 +274,15 @@ node llm_demo.js ~/qwen2.0_1.5b/config.json ~/qwen2.0_1.5b/prompt.txt
265274
- minP: `minP`中min P的值,默认为0.1
266275
- tfsZ: `tfs`中Z的值,默认为1.0 (即不使用tfs算法)
267276
- typical: `typical`中p的值,默认为1.0 (即不使用typical算法)
268-
- penalty: `penalty`中对于logits中重复token的惩罚项,默认为0.0 (即不惩罚)
277+
- penalty: `penalty`中对于logits中重复token的惩罚项,默认为0.0 (即不惩罚),推荐值为1.05~1.5。
269278
- n_gram: 最大存储的ngram大小,超过此大小的重复ngram将被禁止重复输出,仅在`penalty`选中时生效,默认为8
270279
- ngram_factor: `penalty`中对于重复ngram (n>1) 的额外惩罚,默认为1.0,即没有额外惩罚
271280
- penalty_sampler: `penalty`中施加完惩罚项后采用的sampling策略,可选"greedy"或"temperature",默认greedy.
281+
- Omni语音生成配置
282+
- talker_max_new_tokens: 生成时最大语音token数,在Qwen2.5-Omni中50个语音token对应1秒语音,默认为`2048`
283+
- talker_speaker: 生成语音的音色,Qwen2.5-Omni中支持的音色为:`["Chelsie", "Ethan"]`
284+
- dit_steps: 生成语音时扩散模型迭代次数,默认为`5`, 建议设置为`5~10`, 越大语音质量越高计算耗时越高;
285+
- dit_solver: 生成语音时扩散模型求解算法阶数,支持`1, 4`,默认为`1`使用一阶欧拉法;`4`表示四阶龙格库塔法,效果略好但耗时增加4倍;
272286

273287
##### 配置文件示例
274288
- `config.json`
@@ -471,3 +485,127 @@ python llmexport.py --path /path/to/Qwen2.5-0.5B-Instruct --lora_path /path/to/l
471485
thread2.join();
472486
}
473487
```
488+
489+
#### 获取语音输出
490+
使用Omni模型时,可以使用接口`setWavformCallback`获取语音输出,示例如下:
491+
1. 保存语音到文件中
492+
```cpp
493+
#include <audio/audio.hpp>
494+
int main() {
495+
// save wavform to file for debug
496+
std::vector<float> waveform;
497+
llm->setWavformCallback([&](const float* ptr, size_t size, bool last_chunk) {
498+
waveform.reserve(waveform.size() + size);
499+
waveform.insert(waveform.end(), ptr, ptr + size);
500+
if (last_chunk) {
501+
auto waveform_var = MNN::Express::_Const(waveform.data(), {(int)waveform.size()}, MNN::Express::NCHW, halide_type_of<float>());
502+
MNN::AUDIO::save("output.wav", waveform_var, 24000);
503+
waveform.clear();
504+
}
505+
return true;
506+
});
507+
return 0;
508+
}
509+
510+
```
511+
2. 流式播放语音(Mac/iOS为例)
512+
```cpp
513+
#include <thread>
514+
#include <AudioToolbox/AudioToolbox.h>
515+
516+
struct AudioPlayer {
517+
AudioStreamBasicDescription format;
518+
std::vector<float> audioBuffer;
519+
std::mutex bufferMutex;
520+
std::condition_variable bufferCondVar;
521+
bool doneGenerating = false;
522+
std::thread playThread;
523+
AudioPlayer() {
524+
format.mSampleRate = 24000;
525+
format.mFormatID = kAudioFormatLinearPCM;
526+
format.mFormatFlags = kLinearPCMFormatFlagIsFloat;
527+
format.mBytesPerPacket = sizeof(float);
528+
format.mFramesPerPacket = 1;
529+
format.mBytesPerFrame = sizeof(float);
530+
format.mChannelsPerFrame = 1;
531+
format.mBitsPerChannel = sizeof(float) * 8;
532+
}
533+
bool play(const float* ptr, size_t size, bool last_chunk);
534+
};
535+
536+
void AudioQueueCallback(void* userData, AudioQueueRef inAQ, AudioQueueBufferRef inBuffer) {
537+
AudioPlayer* context = static_cast<AudioPlayer*>(userData);
538+
std::unique_lock<std::mutex> lock(context->bufferMutex);
539+
int samplesToCopy = inBuffer->mAudioDataBytesCapacity / sizeof(float);
540+
while (context->audioBuffer.size() < samplesToCopy) {
541+
if (context->doneGenerating) { break; }
542+
context->bufferCondVar.wait(lock);
543+
}
544+
if (context->audioBuffer.size() < samplesToCopy) {
545+
samplesToCopy = context->audioBuffer.size();
546+
}
547+
memcpy(inBuffer->mAudioData, context->audioBuffer.data(), samplesToCopy * sizeof(float));
548+
context->audioBuffer.erase(context->audioBuffer.begin(), context->audioBuffer.begin() + samplesToCopy);
549+
inBuffer->mAudioDataByteSize = samplesToCopy * sizeof(float);
550+
AudioQueueEnqueueBuffer(inAQ, inBuffer, 0, nullptr);
551+
}
552+
553+
void playAudioData(AudioPlayer* context) {
554+
AudioQueueRef queue;
555+
AudioQueueNewOutput(&context->format, AudioQueueCallback, context, nullptr, nullptr, 0, &queue);
556+
AudioQueueBufferRef buffers[3];
557+
UInt32 bufferSize = 1024 * sizeof(float);
558+
for (int i = 0; i < 3; ++i) {
559+
AudioQueueAllocateBuffer(queue, bufferSize, &buffers[i]);
560+
AudioQueueCallback(context, queue, buffers[i]);
561+
}
562+
AudioQueueStart(queue, nullptr);
563+
while (true) {
564+
{
565+
std::lock_guard<std::mutex> lock(context->bufferMutex);
566+
if (context->doneGenerating && context->audioBuffer.empty())
567+
break;
568+
}
569+
std::this_thread::sleep_for(std::chrono::milliseconds(100));
570+
}
571+
AudioQueueStop(queue, true);
572+
for (int i = 0; i < 3; ++i) {
573+
AudioQueueFreeBuffer(queue, buffers[i]);
574+
}
575+
AudioQueueDispose(queue, true);
576+
}
577+
578+
bool AudioPlayer::play(const float* ptr, size_t size, bool last_chunk) {
579+
{
580+
std::lock_guard<std::mutex> lock(bufferMutex);
581+
audioBuffer.reserve(audioBuffer.size() + size);
582+
audioBuffer.insert(audioBuffer.end(), ptr, ptr + size);
583+
}
584+
if (playThread.joinable()) {
585+
bufferCondVar.notify_all();
586+
} else {
587+
playThread = std::thread(playAudioData, this);
588+
printf(">>>>>>>> PLAY START\n");
589+
}
590+
if (last_chunk) {
591+
doneGenerating = true;
592+
bufferCondVar.notify_all();
593+
if (playThread.joinable()) {
594+
playThread.join();
595+
printf(">>>>>>>> PLAY END\n");
596+
}
597+
return false;
598+
}
599+
return true;
600+
}
601+
602+
int main() {
603+
//....
604+
AudioPlayer audio_player;
605+
llm->setWavformCallback([&](const float* ptr, size_t size, bool last_chunk) {
606+
return audio_player.play(ptr, size, last_chunk);
607+
});
608+
//....
609+
return 0;
610+
}
611+
```

include/MNN/MNNDefine.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,6 @@ MNN_ERROR("Check failed: %s ==> %s\n", #success, #log); \
7676
#define STR(x) STR_IMP(x)
7777
#define MNN_VERSION_MAJOR 3
7878
#define MNN_VERSION_MINOR 1
79-
#define MNN_VERSION_PATCH 3
79+
#define MNN_VERSION_PATCH 4
8080
#define MNN_VERSION STR(MNN_VERSION_MAJOR) "." STR(MNN_VERSION_MINOR) "." STR(MNN_VERSION_PATCH)
8181
#endif /* MNNDefine_h */

include/MNN/expr/Executor.hpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
#include <mutex>
1616
#include <set>
1717
#include <MNN/MNNForwardType.h>
18+
1819
namespace MNN {
1920
class Backend;
2021
class Execution;

pymnn/pip_package/MNN/expr/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -97,7 +97,7 @@ def _to_var(x, dtype=None):
9797
except:
9898
pass
9999
# 3. Sequence
100-
if isinstance(x, _Sequence) and x:
100+
if isinstance(x, _Sequence):
101101
dst_shape, item_type = _list_shape_type(x)
102102
x = _F.const(x, dst_shape, dtype=item_type)
103103
# 4. asssert

pymnn/test/model_test.py

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -137,9 +137,13 @@ def modelTest(modelPath, givenName, expectName):
137137
outputHost = createTensor(outputTensor)
138138
outputTensor.copyToHostTensor(outputHost)
139139
# compare
140-
success = compareTensor(outputHost, expectName)
141-
log_result(success, modelPath)
142-
140+
if "mobilenetv1quan" in modelPath or "overflowaware" in modelPath:
141+
success = compareTensor(outputHost, expectName, 0.1)
142+
log_result(success, modelPath)
143+
else:
144+
success = compareTensor(outputHost, expectName)
145+
log_result(success, modelPath)
146+
143147
def modelTestWithConfig(config):
144148
model = config['model_name']
145149
inputs = config['input_names']

pymnn/test/unit_test.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1206,6 +1206,9 @@ def test_histogram(self):
12061206
x_ = x.read()
12071207
self.assertEqualVars(mp.histogram(x, 7, (2, 4)), np.histogram(x_, 7, (2, 4)))
12081208
def test_ndarray(self):
1209+
e = mp.array([])
1210+
e_ = np.array([])
1211+
self.assertEqualVar(e, e_)
12091212
x = mp.array([[1,2],[3,4]])
12101213
x_ = np.array([[1,2],[3,4]])
12111214
self.assertEqual(x.all(), x_.all())

0 commit comments

Comments
 (0)