Skip to content

Commit 1183c3d

Browse files
committed
Code cleanup
1 parent 30c1d12 commit 1183c3d

File tree

2 files changed

+84
-96
lines changed

2 files changed

+84
-96
lines changed

src/cpp/src/rag/text_embedding_pipeline.cpp

Lines changed: 2 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -175,13 +175,8 @@ std::string get_post_type_string(const TextEmbeddingPipeline::Config& config) {
175175
post_type = "cls";
176176
} else if (config.pooling_type == TextEmbeddingPipeline::PoolingType::MEAN) {
177177
post_type = "mean";
178-
} if (config.pooling_type == TextEmbeddingPipeline::PoolingType::LAST_TOKEN) {
179-
const auto left_padding = config.padding_side.has_value() && config.padding_side.value() == "left";
180-
if (left_padding) {
181-
post_type = "last_token_left";
182-
} else {
183-
post_type = "last_token_right";
184-
}
178+
} else {
179+
post_type = "last_token";
185180
}
186181
return post_type;
187182
}
@@ -244,7 +239,6 @@ class TextEmbeddingPipeline::TextEmbeddingPipelineImpl {
244239

245240
bool is_padding_on_left = false;
246241
if (m_config.padding_side) {
247-
std::cout << "GenAI: padding_side: " << *m_config.padding_side << std::endl;
248242
m_tokenization_params.insert({padding_side.name(), *m_config.padding_side});
249243
if (m_config.padding_side.value() == "left") {
250244
is_padding_on_left = true;
@@ -259,7 +253,6 @@ class TextEmbeddingPipeline::TextEmbeddingPipelineImpl {
259253

260254
ov::CompiledModel compiled_model;
261255
if (device == "NPU" && model->is_dynamic()) {
262-
std::cout << "GenAI: Go to NPUW branch" << std::endl;
263256
const auto is_padding = m_config.pad_to_max_length.has_value() && m_config.pad_to_max_length.value();
264257

265258
OPENVINO_ASSERT(!is_padding || config.pooling_type == TextEmbeddingPipeline::PoolingType::MEAN,

src/cpp/src/utils.cpp

Lines changed: 82 additions & 87 deletions
Original file line numberDiff line numberDiff line change
@@ -591,11 +591,75 @@ void print_scheduler_config_info(const SchedulerConfig &scheduler_config) {
591591
std::cout << scheduler_config.to_string() << std::endl;
592592
}
593593

594+
void import_npu_model(ov::CompiledModel& compiled,
595+
KVDesc& kv_desc,
596+
const ov::AnyMap& config,
597+
const std::string& blob_path) {
598+
if (!std::filesystem::exists(blob_path)) {
599+
OPENVINO_THROW("Blob file is not found at: " + blob_path);
600+
}
601+
std::ifstream fin(blob_path, std::ios::in | std::ios::binary);
602+
if (!fin.is_open()) {
603+
OPENVINO_THROW("Blob file can't be opened: " + blob_path);
604+
}
605+
compiled = ov::genai::utils::singleton_core().import_model(fin, "NPU", config);
606+
kv_desc.max_prompt_len = compiled.get_property("NPUW_LLM_MAX_PROMPT_LEN").as<uint32_t>();
607+
kv_desc.min_response_len = compiled.get_property("NPUW_LLM_MIN_RESPONSE_LEN").as<uint32_t>();
608+
}
609+
610+
void export_npu_model(ov::CompiledModel& compiled,
611+
std::string& blob_path) {
612+
if (blob_path.empty()) {
613+
blob_path = "openvino_model.blob";
614+
}
615+
// Check the path is full
616+
const int EXT_SIZE = 5; // ".blob"
617+
if (blob_path.size() < EXT_SIZE) {
618+
OPENVINO_THROW("Please provide a full path to blob file in BLOB_PATH: " + blob_path);
619+
}
620+
if (strncmp(".blob", &blob_path[blob_path.size() - EXT_SIZE], EXT_SIZE) != 0) {
621+
OPENVINO_THROW("Please provide a full path to blob file in BLOB_PATH: " + blob_path);
622+
}
623+
std::ofstream fout(blob_path, std::ios::out | std::ios::binary);
624+
if (!fout.is_open()) {
625+
OPENVINO_THROW("Blob file can't be exported to: " + blob_path);
626+
}
627+
compiled.export_model(fout);
628+
}
629+
630+
void get_npu_model_config(ov::AnyMap& properties, const KVAxesPosition& kv_pos,
631+
KVDesc& kv_desc, const bool is_whisper) {
632+
if (is_whisper) {
633+
kv_desc.max_prompt_len = pop_int_and_cast(properties, "MAX_PROMPT_LEN").value_or(4u);
634+
// kvcache size for Whisper = 448u (MAX_PROMPT_LEN + MIN_RESPONSE_LEN)
635+
kv_desc.min_response_len = pop_int_and_cast(properties, "MIN_RESPONSE_LEN").value_or(444u);
636+
update_npu_config_whisper(properties, kv_pos, kv_desc);
637+
} else {
638+
kv_desc.max_prompt_len = pop_int_and_cast(properties, "MAX_PROMPT_LEN").value_or(1024u);
639+
kv_desc.min_response_len = pop_int_and_cast(properties, "MIN_RESPONSE_LEN").value_or(128u);
640+
update_npu_config(properties, kv_pos, kv_desc);
641+
}
642+
}
643+
644+
void get_npu_text_embedding_config(ov::AnyMap& properties, const KVAxesPosition& kv_pos,
645+
KVDesc& kv_desc, const ov::AnyMap& text_embed_config) {
646+
auto max_len = get_option<uint32_t>(text_embed_config, "MAX_PROMPT_LEN");
647+
if (max_len.has_value()) {
648+
kv_desc.max_prompt_len = max_len.value();
649+
} else {
650+
kv_desc.max_prompt_len = pop_int_and_cast(properties, "MAX_PROMPT_LEN").value_or(1024u);
651+
}
652+
kv_desc.min_response_len = kv_desc.max_prompt_len;
653+
update_npu_config_text_embedding(properties, text_embed_config, kv_pos, kv_desc);
654+
}
655+
594656
std::pair<ov::CompiledModel, KVDesc>
595-
compile_decoder_for_npu(const std::shared_ptr<ov::Model>& model,
657+
compile_decoder_for_npu_impl(const std::shared_ptr<ov::Model>& model,
596658
const ov::AnyMap& config,
597659
const KVAxesPosition& kv_pos,
598-
const bool is_whisper) {
660+
const bool is_whisper,
661+
const bool is_text_embedding,
662+
const ov::AnyMap& text_embed_config = {}) {
599663
ov::CompiledModel compiled;
600664
ov::AnyMap properties = config;
601665
KVDesc kv_desc;
@@ -605,107 +669,38 @@ compile_decoder_for_npu(const std::shared_ptr<ov::Model>& model,
605669
const bool do_import = (!blob_path.empty() && !export_blob);
606670

607671
if (do_import) {
608-
if (!std::filesystem::exists(blob_path)) {
609-
OPENVINO_THROW("Blob file is not found at: " + blob_path);
610-
}
611-
std::ifstream fin(blob_path, std::ios::in | std::ios::binary);
612-
if (!fin.is_open()) {
613-
OPENVINO_THROW("Blob file can't be opened: " + blob_path);
614-
}
615-
compiled = ov::genai::utils::singleton_core().import_model(fin, "NPU", config);
616-
kv_desc.max_prompt_len = compiled.get_property("NPUW_LLM_MAX_PROMPT_LEN").as<uint32_t>();
617-
kv_desc.min_response_len = compiled.get_property("NPUW_LLM_MIN_RESPONSE_LEN").as<uint32_t>();
672+
import_npu_model(compiled, kv_desc, properties, blob_path);
618673
} else {
619-
if (is_whisper) {
620-
kv_desc.max_prompt_len = pop_int_and_cast(properties, "MAX_PROMPT_LEN").value_or(4u);
621-
// kvcache size for Whisper = 448u (MAX_PROMPT_LEN + MIN_RESPONSE_LEN)
622-
kv_desc.min_response_len = pop_int_and_cast(properties, "MIN_RESPONSE_LEN").value_or(444u);
623-
update_npu_config_whisper(properties, kv_pos, kv_desc);
674+
if (is_text_embedding) {
675+
get_npu_text_embedding_config(properties, kv_pos, kv_desc, text_embed_config);
624676
} else {
625-
kv_desc.max_prompt_len = pop_int_and_cast(properties, "MAX_PROMPT_LEN").value_or(1024u);
626-
kv_desc.min_response_len = pop_int_and_cast(properties, "MIN_RESPONSE_LEN").value_or(128u);
627-
update_npu_config(properties, kv_pos, kv_desc);
677+
get_npu_model_config(properties, kv_pos, kv_desc, is_whisper);
628678
}
679+
629680
compiled = ov::genai::utils::singleton_core().compile_model(model, "NPU", properties);
630681
// Also export compiled model if required
631682
if (export_blob) {
632-
if (blob_path.empty()) {
633-
blob_path = "openvino_model.blob";
634-
}
635-
// Check the path is full
636-
const int EXT_SIZE = 5; // ".blob"
637-
if (blob_path.size() < EXT_SIZE) {
638-
OPENVINO_THROW("Please provide a full path to blob file in BLOB_PATH: " + blob_path);
639-
}
640-
if (strncmp(".blob", &blob_path[blob_path.size() - EXT_SIZE], EXT_SIZE) != 0) {
641-
OPENVINO_THROW("Please provide a full path to blob file in BLOB_PATH: " + blob_path);
642-
}
643-
std::ofstream fout(blob_path, std::ios::out | std::ios::binary);
644-
if (!fout.is_open()) {
645-
OPENVINO_THROW("Blob file can't be exported to: " + blob_path);
646-
}
647-
compiled.export_model(fout);
683+
export_npu_model(compiled, blob_path);
648684
}
649685
}
686+
650687
return { compiled, kv_desc };
651688
}
652689

690+
std::pair<ov::CompiledModel, KVDesc>
691+
compile_decoder_for_npu(const std::shared_ptr<ov::Model>& model,
692+
const ov::AnyMap& config,
693+
const KVAxesPosition& kv_pos,
694+
const bool is_whisper) {
695+
return compile_decoder_for_npu_impl(model, config, kv_pos, is_whisper, false);
696+
}
697+
653698
std::pair<ov::CompiledModel, KVDesc>
654699
compile_decoder_for_npu_text_embedding(const std::shared_ptr<ov::Model>& model,
655700
const ov::AnyMap& config,
656701
const KVAxesPosition& kv_pos,
657702
const ov::AnyMap& text_embed_config) {
658-
ov::CompiledModel compiled;
659-
ov::AnyMap properties = config;
660-
KVDesc kv_desc;
661-
662-
auto blob_path = pop_or_default(properties, "BLOB_PATH", std::string{});
663-
const auto export_blob = pop_or_default(properties, "EXPORT_BLOB", false);
664-
const bool do_import = (!blob_path.empty() && !export_blob);
665-
666-
if (do_import) {
667-
if (!std::filesystem::exists(blob_path)) {
668-
OPENVINO_THROW("Blob file is not found at: " + blob_path);
669-
}
670-
std::ifstream fin(blob_path, std::ios::in | std::ios::binary);
671-
if (!fin.is_open()) {
672-
OPENVINO_THROW("Blob file can't be opened: " + blob_path);
673-
}
674-
compiled = ov::genai::utils::singleton_core().import_model(fin, "NPU", config);
675-
kv_desc.max_prompt_len = compiled.get_property("NPUW_LLM_MAX_PROMPT_LEN").as<uint32_t>();
676-
kv_desc.min_response_len = compiled.get_property("NPUW_LLM_MIN_RESPONSE_LEN").as<uint32_t>();
677-
} else {
678-
auto max_len = get_option<uint32_t>(text_embed_config, "MAX_PROMPT_LEN");
679-
if (max_len.has_value()) {
680-
kv_desc.max_prompt_len = max_len.value();
681-
} else {
682-
kv_desc.max_prompt_len = pop_int_and_cast(properties, "MAX_PROMPT_LEN").value_or(1024u);
683-
}
684-
kv_desc.min_response_len = kv_desc.max_prompt_len;
685-
update_npu_config_text_embedding(properties, text_embed_config, kv_pos, kv_desc);
686-
687-
compiled = ov::genai::utils::singleton_core().compile_model(model, "NPU", properties);
688-
// Also export compiled model if required
689-
if (export_blob) {
690-
if (blob_path.empty()) {
691-
blob_path = "openvino_model.blob";
692-
}
693-
// Check the path is full
694-
const int EXT_SIZE = 5; // ".blob"
695-
if (blob_path.size() < EXT_SIZE) {
696-
OPENVINO_THROW("Please provide a full path to blob file in BLOB_PATH: " + blob_path);
697-
}
698-
if (strncmp(".blob", &blob_path[blob_path.size() - EXT_SIZE], EXT_SIZE) != 0) {
699-
OPENVINO_THROW("Please provide a full path to blob file in BLOB_PATH: " + blob_path);
700-
}
701-
std::ofstream fout(blob_path, std::ios::out | std::ios::binary);
702-
if (!fout.is_open()) {
703-
OPENVINO_THROW("Blob file can't be exported to: " + blob_path);
704-
}
705-
compiled.export_model(fout);
706-
}
707-
}
708-
return { compiled, kv_desc };
703+
return compile_decoder_for_npu_impl(model, config, kv_pos, false, true, text_embed_config);
709704
}
710705

711706
std::optional<ov::Any> pop_option(ov::AnyMap& config, const std::string& option_name) {

0 commit comments

Comments
 (0)