Code cleanup

mengweiguo · mengweiguo · commit 1183c3d5ed1b · 2025-12-01T10:22:40.000+08:00
diff --git a/src/cpp/src/rag/text_embedding_pipeline.cpp b/src/cpp/src/rag/text_embedding_pipeline.cpp
@@ -175,13 +175,8 @@ std::string get_post_type_string(const TextEmbeddingPipeline::Config& config) {
         post_type = "cls";
     } else if (config.pooling_type == TextEmbeddingPipeline::PoolingType::MEAN) {
         post_type = "mean";
-    } if (config.pooling_type == TextEmbeddingPipeline::PoolingType::LAST_TOKEN) {
-        const auto left_padding = config.padding_side.has_value() && config.padding_side.value() == "left";
-        if (left_padding) {
-            post_type = "last_token_left";
-        } else {
-            post_type = "last_token_right";
-        }
+    } else {
+        post_type = "last_token";
     }
     return post_type;
 }
@@ -244,7 +239,6 @@ class TextEmbeddingPipeline::TextEmbeddingPipelineImpl {
 
         bool is_padding_on_left = false;
         if (m_config.padding_side) {
-            std::cout << "GenAI: padding_side: " << *m_config.padding_side << std::endl;
             m_tokenization_params.insert({padding_side.name(), *m_config.padding_side});
             if (m_config.padding_side.value() == "left") {
                 is_padding_on_left = true;
@@ -259,7 +253,6 @@ class TextEmbeddingPipeline::TextEmbeddingPipelineImpl {
 
         ov::CompiledModel compiled_model;
         if (device == "NPU" && model->is_dynamic()) {
-            std::cout << "GenAI: Go to NPUW branch" << std::endl;
             const auto is_padding = m_config.pad_to_max_length.has_value() && m_config.pad_to_max_length.value();
 
             OPENVINO_ASSERT(!is_padding || config.pooling_type == TextEmbeddingPipeline::PoolingType::MEAN,
diff --git a/src/cpp/src/utils.cpp b/src/cpp/src/utils.cpp
@@ -591,11 +591,75 @@ void print_scheduler_config_info(const SchedulerConfig &scheduler_config) {
     std::cout << scheduler_config.to_string() << std::endl;
 }
 
+void import_npu_model(ov::CompiledModel& compiled,
+                        KVDesc& kv_desc,
+                        const ov::AnyMap& config,
+                        const std::string& blob_path) {
+    if (!std::filesystem::exists(blob_path)) {
+        OPENVINO_THROW("Blob file is not found at: " + blob_path);
+    }
+    std::ifstream fin(blob_path, std::ios::in | std::ios::binary);
+    if (!fin.is_open()) {
+        OPENVINO_THROW("Blob file can't be opened: " + blob_path);
+    }
+    compiled = ov::genai::utils::singleton_core().import_model(fin, "NPU", config);
+    kv_desc.max_prompt_len = compiled.get_property("NPUW_LLM_MAX_PROMPT_LEN").as<uint32_t>();
+    kv_desc.min_response_len = compiled.get_property("NPUW_LLM_MIN_RESPONSE_LEN").as<uint32_t>();
+}
+
+void export_npu_model(ov::CompiledModel& compiled,
+                        std::string& blob_path) {
+    if (blob_path.empty()) {
+        blob_path = "openvino_model.blob";
+    }
+    // Check the path is full
+    const int EXT_SIZE = 5; // ".blob"
+    if (blob_path.size() < EXT_SIZE) {
+        OPENVINO_THROW("Please provide a full path to blob file in BLOB_PATH: " + blob_path);
+    }
+    if (strncmp(".blob", &blob_path[blob_path.size() - EXT_SIZE], EXT_SIZE) != 0) {
+        OPENVINO_THROW("Please provide a full path to blob file in BLOB_PATH: " + blob_path);
+    }
+    std::ofstream fout(blob_path, std::ios::out | std::ios::binary);
+    if (!fout.is_open()) {
+        OPENVINO_THROW("Blob file can't be exported to: " + blob_path);
+    }
+    compiled.export_model(fout);
+}
+
+void get_npu_model_config(ov::AnyMap& properties, const KVAxesPosition& kv_pos,
+                                      KVDesc& kv_desc, const bool is_whisper) {
+    if (is_whisper) {
+        kv_desc.max_prompt_len = pop_int_and_cast(properties, "MAX_PROMPT_LEN").value_or(4u);
+        // kvcache size for Whisper = 448u (MAX_PROMPT_LEN + MIN_RESPONSE_LEN)
+        kv_desc.min_response_len = pop_int_and_cast(properties, "MIN_RESPONSE_LEN").value_or(444u);
+        update_npu_config_whisper(properties, kv_pos, kv_desc);
+    } else {
+        kv_desc.max_prompt_len = pop_int_and_cast(properties, "MAX_PROMPT_LEN").value_or(1024u);
+        kv_desc.min_response_len = pop_int_and_cast(properties, "MIN_RESPONSE_LEN").value_or(128u);
+        update_npu_config(properties, kv_pos, kv_desc);
+    }
+}
+
+void get_npu_text_embedding_config(ov::AnyMap& properties, const KVAxesPosition& kv_pos,
+                                      KVDesc& kv_desc, const ov::AnyMap& text_embed_config) {
+    auto max_len = get_option<uint32_t>(text_embed_config, "MAX_PROMPT_LEN");
+    if (max_len.has_value()) {
+        kv_desc.max_prompt_len = max_len.value();
+    } else {
+        kv_desc.max_prompt_len = pop_int_and_cast(properties, "MAX_PROMPT_LEN").value_or(1024u);
+    }
+    kv_desc.min_response_len = kv_desc.max_prompt_len;
+    update_npu_config_text_embedding(properties, text_embed_config, kv_pos, kv_desc);
+}
+
 std::pair<ov::CompiledModel, KVDesc>
-compile_decoder_for_npu(const std::shared_ptr<ov::Model>& model,
+compile_decoder_for_npu_impl(const std::shared_ptr<ov::Model>& model,
                         const ov::AnyMap& config,
                         const KVAxesPosition& kv_pos,
-                        const bool is_whisper) {
+                        const bool is_whisper,
+                        const bool is_text_embedding,
+                        const ov::AnyMap& text_embed_config = {}) {
     ov::CompiledModel compiled;
     ov::AnyMap properties = config;
     KVDesc kv_desc;
@@ -605,107 +669,38 @@ compile_decoder_for_npu(const std::shared_ptr<ov::Model>& model,
     const bool do_import = (!blob_path.empty() && !export_blob);
 
     if (do_import) {
-        if (!std::filesystem::exists(blob_path)) {
-            OPENVINO_THROW("Blob file is not found at: " + blob_path);
-        }
-        std::ifstream fin(blob_path, std::ios::in | std::ios::binary);
-        if (!fin.is_open()) {
-            OPENVINO_THROW("Blob file can't be opened: " + blob_path);
-        }
-        compiled = ov::genai::utils::singleton_core().import_model(fin, "NPU", config);
-        kv_desc.max_prompt_len = compiled.get_property("NPUW_LLM_MAX_PROMPT_LEN").as<uint32_t>();
-        kv_desc.min_response_len = compiled.get_property("NPUW_LLM_MIN_RESPONSE_LEN").as<uint32_t>();
+        import_npu_model(compiled, kv_desc, properties, blob_path);
     } else {
-        if (is_whisper) {
-            kv_desc.max_prompt_len = pop_int_and_cast(properties, "MAX_PROMPT_LEN").value_or(4u);
-            // kvcache size for Whisper = 448u (MAX_PROMPT_LEN + MIN_RESPONSE_LEN)
-            kv_desc.min_response_len = pop_int_and_cast(properties, "MIN_RESPONSE_LEN").value_or(444u);
-            update_npu_config_whisper(properties, kv_pos, kv_desc);
+        if (is_text_embedding) {
+            get_npu_text_embedding_config(properties, kv_pos, kv_desc, text_embed_config);
         } else {
-            kv_desc.max_prompt_len = pop_int_and_cast(properties, "MAX_PROMPT_LEN").value_or(1024u);
-            kv_desc.min_response_len = pop_int_and_cast(properties, "MIN_RESPONSE_LEN").value_or(128u);
-            update_npu_config(properties, kv_pos, kv_desc);
+            get_npu_model_config(properties, kv_pos, kv_desc, is_whisper);
         }
+
         compiled = ov::genai::utils::singleton_core().compile_model(model, "NPU", properties);
         // Also export compiled model if required
         if (export_blob) {
-            if (blob_path.empty()) {
-                blob_path = "openvino_model.blob";
-            }
-            // Check the path is full
-            const int EXT_SIZE = 5; // ".blob"
-            if (blob_path.size() < EXT_SIZE) {
-                OPENVINO_THROW("Please provide a full path to blob file in BLOB_PATH: " + blob_path);
-            }
-            if (strncmp(".blob", &blob_path[blob_path.size() - EXT_SIZE], EXT_SIZE) != 0) {
-                OPENVINO_THROW("Please provide a full path to blob file in BLOB_PATH: " + blob_path);
-            }
-            std::ofstream fout(blob_path, std::ios::out | std::ios::binary);
-            if (!fout.is_open()) {
-                OPENVINO_THROW("Blob file can't be exported to: " + blob_path);
-            }
-            compiled.export_model(fout);
+            export_npu_model(compiled, blob_path);
         }
     }
+
     return { compiled, kv_desc };
 }
 
+std::pair<ov::CompiledModel, KVDesc>
+compile_decoder_for_npu(const std::shared_ptr<ov::Model>& model,
+                        const ov::AnyMap& config,
+                        const KVAxesPosition& kv_pos,
+                        const bool is_whisper) {
+    return compile_decoder_for_npu_impl(model, config, kv_pos, is_whisper, false);
+}
+
 std::pair<ov::CompiledModel, KVDesc>
 compile_decoder_for_npu_text_embedding(const std::shared_ptr<ov::Model>& model,
                         const ov::AnyMap& config,
                         const KVAxesPosition& kv_pos,
                         const ov::AnyMap& text_embed_config) {
-    ov::CompiledModel compiled;
-    ov::AnyMap properties = config;
-    KVDesc kv_desc;
-
-    auto blob_path = pop_or_default(properties, "BLOB_PATH", std::string{});
-    const auto export_blob = pop_or_default(properties, "EXPORT_BLOB", false);
-    const bool do_import = (!blob_path.empty() && !export_blob);
-
-    if (do_import) {
-        if (!std::filesystem::exists(blob_path)) {
-            OPENVINO_THROW("Blob file is not found at: " + blob_path);
-        }
-        std::ifstream fin(blob_path, std::ios::in | std::ios::binary);
-        if (!fin.is_open()) {
-            OPENVINO_THROW("Blob file can't be opened: " + blob_path);
-        }
-        compiled = ov::genai::utils::singleton_core().import_model(fin, "NPU", config);
-        kv_desc.max_prompt_len = compiled.get_property("NPUW_LLM_MAX_PROMPT_LEN").as<uint32_t>();
-        kv_desc.min_response_len = compiled.get_property("NPUW_LLM_MIN_RESPONSE_LEN").as<uint32_t>();
-    } else {
-            auto max_len = get_option<uint32_t>(text_embed_config, "MAX_PROMPT_LEN");
-            if (max_len.has_value()) {
-                kv_desc.max_prompt_len = max_len.value();
-            } else {
-                kv_desc.max_prompt_len = pop_int_and_cast(properties, "MAX_PROMPT_LEN").value_or(1024u);
-            }
-            kv_desc.min_response_len = kv_desc.max_prompt_len;
-            update_npu_config_text_embedding(properties, text_embed_config, kv_pos, kv_desc);
-
-        compiled = ov::genai::utils::singleton_core().compile_model(model, "NPU", properties);
-        // Also export compiled model if required
-        if (export_blob) {
-            if (blob_path.empty()) {
-                blob_path = "openvino_model.blob";
-            }
-            // Check the path is full
-            const int EXT_SIZE = 5; // ".blob"
-            if (blob_path.size() < EXT_SIZE) {
-                OPENVINO_THROW("Please provide a full path to blob file in BLOB_PATH: " + blob_path);
-            }
-            if (strncmp(".blob", &blob_path[blob_path.size() - EXT_SIZE], EXT_SIZE) != 0) {
-                OPENVINO_THROW("Please provide a full path to blob file in BLOB_PATH: " + blob_path);
-            }
-            std::ofstream fout(blob_path, std::ios::out | std::ios::binary);
-            if (!fout.is_open()) {
-                OPENVINO_THROW("Blob file can't be exported to: " + blob_path);
-            }
-            compiled.export_model(fout);
-        }
-    }
-    return { compiled, kv_desc };
+    return compile_decoder_for_npu_impl(model, config, kv_pos, false, true, text_embed_config);
 }
 
 std::optional<ov::Any> pop_option(ov::AnyMap& config, const std::string& option_name) {