diff --git a/site/docs/bindings/node-js.md b/site/docs/bindings/node-js.md
index 7abaef3504..ece443fbaf 100644
--- a/site/docs/bindings/node-js.md
+++ b/site/docs/bindings/node-js.md
@@ -9,7 +9,7 @@ description: Node.js bindings provide JavaScript/TypeScript API.
OpenVINO GenAI provides Node.js bindings that enable you to use generative AI pipelines in JavaScript and TypeScript applications.
:::warning API Coverage
-Node.js bindings currently provide a subset of the full OpenVINO GenAI API available in C++ and Python. The focus is on core text generation (`LLMPipeline`) and text embedding (`TextEmbeddingPipeline`) functionality.
+Node.js bindings currently provide a subset of the full OpenVINO GenAI API available in C++ and Python. The focus is on core text generation (`LLMPipeline`), vision language models (`VLMPipeline`), and text embedding (`TextEmbeddingPipeline`) functionality.
:::
## Supported Pipelines and Features
@@ -23,6 +23,10 @@ Node.js bindings currently support:
- Multiple sampling strategies (greedy, beam search)
- Structured output
- ReAct agent support
+- `VLMPipeline`: Vision Language Model inference for multimodal tasks
+ - Process images and videos with text prompts
+ - Chat mode with conversation history
+ - Streaming support
- `TextEmbeddingPipeline`: Generate text embeddings for semantic search and RAG applications
- `Tokenizer`: Fast tokenization / detokenization and chat prompt formatting
- Encode strings into token id and attention mask tensors
diff --git a/site/docs/use-cases/image-processing/_sections/_run_model/_code_example_js.mdx b/site/docs/use-cases/image-processing/_sections/_run_model/_code_example_js.mdx
new file mode 100644
index 0000000000..f2a495c1e7
--- /dev/null
+++ b/site/docs/use-cases/image-processing/_sections/_run_model/_code_example_js.mdx
@@ -0,0 +1,40 @@
+import CodeBlock from '@theme/CodeBlock';
+
+
+{`import { addon as ov } from "openvino-node";
+import { VLMPipeline } from "openvino-genai-node";
+import { stat, readdir } from "node:fs/promises";
+import sharp from "sharp";
+import path from "node:path";
+
+async function readImage(imagePath) {
+ const img = sharp(imagePath);
+ const metadata = await img.metadata();
+ const { width, height, channels } = metadata;
+ const imageBuffer = await img.raw().toBuffer();
+ return new ov.Tensor(ov.element.u8, [height, width, channels], imageBuffer);
+}
+
+async function readImages(imagePath) {
+ const stats = await stat(imagePath);
+ if (stats.isDirectory()) {
+ const files = await readdir(imagePath);
+ return Promise.all(files.sort().map((file) => readImage(path.join(imagePath, file))));
+ }
+ return [await readImage(imagePath)];
+}
+
+const images = await readImages("./images");
+
+const pipe = await VLMPipeline(modelPath, "${props.device || 'CPU'}");
+
+const result = await pipe.generate(prompt, {
+ images,
+ generationConfig: { max_new_tokens: 100 },
+});
+console.log(result.texts[0]);
+
+// To input videos frames, use 'videos' option, frames tensor layout = [Frame, H, W, C]
+// const result = await pipe.generate(prompt, { videos: [frames], generationConfig: { max_new_tokens: 100 } });
+`}
+
diff --git a/site/docs/use-cases/image-processing/_sections/_run_model/index.mdx b/site/docs/use-cases/image-processing/_sections/_run_model/index.mdx
index b5082eb1ef..284193977c 100644
--- a/site/docs/use-cases/image-processing/_sections/_run_model/index.mdx
+++ b/site/docs/use-cases/image-processing/_sections/_run_model/index.mdx
@@ -1,5 +1,6 @@
import CodeExampleCPP from './_code_example_cpp.mdx';
import CodeExamplePython from './_code_example_python.mdx';
+import CodeExampleJS from './_code_example_js.mdx';
## Run Model Using OpenVINO GenAI
@@ -27,6 +28,16 @@ It can generate text from a text prompt and images as inputs.
+
+
+
+
+
+
+
+
+
+
:::tip
diff --git a/site/docs/use-cases/image-processing/_sections/_usage_options/index.mdx b/site/docs/use-cases/image-processing/_sections/_usage_options/index.mdx
index 6ef41e98f1..b953df27bf 100644
--- a/site/docs/use-cases/image-processing/_sections/_usage_options/index.mdx
+++ b/site/docs/use-cases/image-processing/_sections/_usage_options/index.mdx
@@ -81,6 +81,28 @@ Similar to [text generation](/docs/use-cases/text-generation/#use-different-gene
}
```
+
+ ```javascript
+ import { VLMPipeline } from 'openvino-genai-node';
+
+ const pipe = await VLMPipeline(modelPath, "CPU", {});
+
+ // Create custom generation configuration
+ const config = {
+ max_new_tokens: 100,
+ temperature: 0.7,
+ top_k: 50,
+ top_p: 0.9,
+ repetition_penalty: 1.2
+ };
+
+ // Generate text with custom configuration
+ const output = await pipe.generate(prompt, {
+ images: images,
+ generationConfig: config
+ });
+ ```
+
diff --git a/site/src/pages/_sections/UseCasesSection/components/image-processing.tsx b/site/src/pages/_sections/UseCasesSection/components/image-processing.tsx
index 50786c9ff3..010150c5a2 100644
--- a/site/src/pages/_sections/UseCasesSection/components/image-processing.tsx
+++ b/site/src/pages/_sections/UseCasesSection/components/image-processing.tsx
@@ -1,10 +1,11 @@
import Button from '@site/src/components/Button';
-import { LanguageTabs, TabItemCpp, TabItemPython } from '@site/src/components/LanguageTabs';
+import { LanguageTabs, TabItemCpp, TabItemPython, TabItemJS } from '@site/src/components/LanguageTabs';
import UseCaseCard from './UseCaseCard';
import CodeExampleCpp from '@site/docs/use-cases/image-processing/_sections/_run_model/_code_example_cpp.mdx';
import CodeExamplePython from '@site/docs/use-cases/image-processing/_sections/_run_model/_code_example_python.mdx';
+import CodeExampleJS from '@site/docs/use-cases/image-processing/_sections/_run_model/_code_example_js.mdx';
export const ImageProcessing = () => (
@@ -27,6 +28,9 @@ export const ImageProcessing = () => (
+
+
+
diff --git a/src/js/include/addon.hpp b/src/js/include/addon.hpp
index 28371ba822..c9b89cc610 100644
--- a/src/js/include/addon.hpp
+++ b/src/js/include/addon.hpp
@@ -9,8 +9,10 @@ typedef Napi::Function (*Prototype)(Napi::Env);
struct AddonData {
Napi::FunctionReference core;
+ Napi::FunctionReference vlm_pipeline;
Napi::FunctionReference tokenizer;
Napi::FunctionReference perf_metrics;
+ Napi::FunctionReference vlm_perf_metrics;
Napi::FunctionReference chat_history;
Napi::ObjectReference openvino_addon;
};
diff --git a/src/js/include/base/perf_metrics.hpp b/src/js/include/base/perf_metrics.hpp
new file mode 100644
index 0000000000..8d85266e14
--- /dev/null
+++ b/src/js/include/base/perf_metrics.hpp
@@ -0,0 +1,261 @@
+// Copyright (C) 2018-2025 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+
+#pragma once
+
+#include
+
+#include "bindings_utils.hpp"
+#include "include/helper.hpp"
+#include "openvino/genai/perf_metrics.hpp"
+
+using ov::genai::common_bindings::utils::get_ms;
+using ov::genai::common_bindings::utils::timestamp_to_ms;
+
+namespace perf_utils {
+
+inline Napi::Object create_mean_std_pair(Napi::Env env, const ov::genai::MeanStdPair& pair) {
+ Napi::Object obj = Napi::Object::New(env);
+ obj.Set("mean", Napi::Number::New(env, pair.mean));
+ obj.Set("std", Napi::Number::New(env, pair.std));
+ return obj;
+}
+
+inline Napi::Object create_summary_stats(Napi::Env env, const ov::genai::SummaryStats& stats) {
+ Napi::Object obj = Napi::Object::New(env);
+ obj.Set("mean", Napi::Number::New(env, stats.mean));
+ obj.Set("std", Napi::Number::New(env, stats.std));
+ obj.Set("min", Napi::Number::New(env, stats.min));
+ obj.Set("max", Napi::Number::New(env, stats.max));
+ return obj;
+}
+
+} // namespace perf_utils
+
+/**
+ * @brief Base template class for PerfMetrics wrappers.
+ *
+ * This class provides common functionality for wrapping ov::genai::PerfMetrics
+ * and derived classes (like VLMPerfMetrics) in Node.js addon.
+ *
+ * @tparam T The derived wrapper class (CRTP pattern).
+ * @tparam MetricsType The type of metrics to store (default: ov::genai::PerfMetrics).
+ */
+template
+class BasePerfMetricsWrapper : public Napi::ObjectWrap {
+public:
+ using PropertyDescriptor = typename Napi::ObjectWrap::PropertyDescriptor;
+
+ BasePerfMetricsWrapper(const Napi::CallbackInfo& info);
+ virtual ~BasePerfMetricsWrapper() {}
+
+ /**
+ * @brief Returns a vector of base class property descriptors.
+ *
+ * Derived classes can use this to get all base methods and add their own.
+ */
+ static std::vector get_class_properties();
+
+ Napi::Value get_load_time(const Napi::CallbackInfo& info);
+ Napi::Value get_num_generated_tokens(const Napi::CallbackInfo& info);
+ Napi::Value get_num_input_tokens(const Napi::CallbackInfo& info);
+ Napi::Value get_ttft(const Napi::CallbackInfo& info);
+ Napi::Value get_tpot(const Napi::CallbackInfo& info);
+ Napi::Value get_ipot(const Napi::CallbackInfo& info);
+ Napi::Value get_throughput(const Napi::CallbackInfo& info);
+
+ Napi::Value get_inference_duration(const Napi::CallbackInfo& info);
+ Napi::Value get_generate_duration(const Napi::CallbackInfo& info);
+ Napi::Value get_tokenization_duration(const Napi::CallbackInfo& info);
+ Napi::Value get_detokenization_duration(const Napi::CallbackInfo& info);
+
+ Napi::Value get_grammar_compiler_init_times(const Napi::CallbackInfo& info);
+ Napi::Value get_grammar_compile_time(const Napi::CallbackInfo& info);
+
+ /**
+ * @brief Base implementation of get_raw_metrics.
+ *
+ * Derived classes MUST override this method to use it with InstanceAccessor.
+ * Example:
+ *
+ * Napi::Value get_raw_metrics(const Napi::CallbackInfo& info) {
+ * return BasePerfMetricsWrapper::get_raw_metrics(info);
+ * }
+ */
+ Napi::Value get_raw_metrics(const Napi::CallbackInfo& info);
+ Napi::Value add(const Napi::CallbackInfo& info);
+ MetricsType& get_value();
+
+protected:
+ MetricsType _metrics;
+};
+
+// Template implementations
+
+template
+BasePerfMetricsWrapper::BasePerfMetricsWrapper(const Napi::CallbackInfo& info)
+ : Napi::ObjectWrap(info),
+ _metrics{} {}
+
+template
+std::vector::PropertyDescriptor>
+BasePerfMetricsWrapper::get_class_properties() {
+ return {
+ T::InstanceMethod("getLoadTime", &T::get_load_time),
+ T::InstanceMethod("getNumGeneratedTokens", &T::get_num_generated_tokens),
+ T::InstanceMethod("getNumInputTokens", &T::get_num_input_tokens),
+ T::InstanceMethod("getTTFT", &T::get_ttft),
+ T::InstanceMethod("getTPOT", &T::get_tpot),
+ T::InstanceMethod("getIPOT", &T::get_ipot),
+ T::InstanceMethod("getThroughput", &T::get_throughput),
+ T::InstanceMethod("getInferenceDuration", &T::get_inference_duration),
+ T::InstanceMethod("getGenerateDuration", &T::get_generate_duration),
+ T::InstanceMethod("getTokenizationDuration", &T::get_tokenization_duration),
+ T::InstanceMethod("getDetokenizationDuration", &T::get_detokenization_duration),
+ T::InstanceMethod("getGrammarCompilerInitTimes", &T::get_grammar_compiler_init_times),
+ T::InstanceMethod("getGrammarCompileTime", &T::get_grammar_compile_time),
+ T::template InstanceAccessor<&T::get_raw_metrics>("rawMetrics"),
+ T::InstanceMethod("add", &T::add),
+ };
+}
+
+template
+Napi::Value BasePerfMetricsWrapper::get_load_time(const Napi::CallbackInfo& info) {
+ VALIDATE_ARGS_COUNT(info, 0, "getLoadTime()");
+ return Napi::Number::New(info.Env(), _metrics.get_load_time());
+}
+
+template
+Napi::Value BasePerfMetricsWrapper::get_num_generated_tokens(const Napi::CallbackInfo& info) {
+ VALIDATE_ARGS_COUNT(info, 0, "getNumGeneratedTokens()");
+ return Napi::Number::New(info.Env(), _metrics.get_num_generated_tokens());
+}
+
+template
+Napi::Value BasePerfMetricsWrapper::get_num_input_tokens(const Napi::CallbackInfo& info) {
+ VALIDATE_ARGS_COUNT(info, 0, "getNumInputTokens()");
+ return Napi::Number::New(info.Env(), _metrics.get_num_input_tokens());
+}
+
+template
+Napi::Value BasePerfMetricsWrapper::get_ttft(const Napi::CallbackInfo& info) {
+ VALIDATE_ARGS_COUNT(info, 0, "getTTFT()");
+ return perf_utils::create_mean_std_pair(info.Env(), _metrics.get_ttft());
+}
+
+template
+Napi::Value BasePerfMetricsWrapper::get_tpot(const Napi::CallbackInfo& info) {
+ VALIDATE_ARGS_COUNT(info, 0, "getTPOT()");
+ return perf_utils::create_mean_std_pair(info.Env(), _metrics.get_tpot());
+}
+
+template
+Napi::Value BasePerfMetricsWrapper::get_ipot(const Napi::CallbackInfo& info) {
+ VALIDATE_ARGS_COUNT(info, 0, "getIPOT()");
+ return perf_utils::create_mean_std_pair(info.Env(), _metrics.get_ipot());
+}
+
+template
+Napi::Value BasePerfMetricsWrapper::get_throughput(const Napi::CallbackInfo& info) {
+ VALIDATE_ARGS_COUNT(info, 0, "getThroughput()");
+ return perf_utils::create_mean_std_pair(info.Env(), _metrics.get_throughput());
+}
+
+template
+Napi::Value BasePerfMetricsWrapper::get_inference_duration(const Napi::CallbackInfo& info) {
+ VALIDATE_ARGS_COUNT(info, 0, "getInferenceDuration()");
+ return perf_utils::create_mean_std_pair(info.Env(), _metrics.get_inference_duration());
+}
+
+template
+Napi::Value BasePerfMetricsWrapper::get_generate_duration(const Napi::CallbackInfo& info) {
+ VALIDATE_ARGS_COUNT(info, 0, "getGenerateDuration()");
+ return perf_utils::create_mean_std_pair(info.Env(), _metrics.get_generate_duration());
+}
+
+template
+Napi::Value BasePerfMetricsWrapper::get_tokenization_duration(const Napi::CallbackInfo& info) {
+ VALIDATE_ARGS_COUNT(info, 0, "getTokenizationDuration()");
+ return perf_utils::create_mean_std_pair(info.Env(), _metrics.get_tokenization_duration());
+}
+
+template
+Napi::Value BasePerfMetricsWrapper::get_detokenization_duration(const Napi::CallbackInfo& info) {
+ VALIDATE_ARGS_COUNT(info, 0, "getDetokenizationDuration()");
+ return perf_utils::create_mean_std_pair(info.Env(), _metrics.get_detokenization_duration());
+}
+
+template
+Napi::Value BasePerfMetricsWrapper::get_grammar_compiler_init_times(const Napi::CallbackInfo& info) {
+ VALIDATE_ARGS_COUNT(info, 0, "getGrammarCompilerInitTimes()");
+ return cpp_map_to_js_object(info.Env(), _metrics.get_grammar_compiler_init_times());
+}
+
+template
+Napi::Value BasePerfMetricsWrapper::get_grammar_compile_time(const Napi::CallbackInfo& info) {
+ VALIDATE_ARGS_COUNT(info, 0, "getGrammarCompileTime()");
+ return perf_utils::create_summary_stats(info.Env(), _metrics.get_grammar_compile_time());
+}
+
+template
+Napi::Value BasePerfMetricsWrapper::get_raw_metrics(const Napi::CallbackInfo& info) {
+ Napi::Object obj = Napi::Object::New(info.Env());
+ obj.Set("generateDurations",
+ cpp_to_js, Napi::Value>(
+ info.Env(),
+ get_ms(_metrics.raw_metrics, &ov::genai::RawPerfMetrics::generate_durations)));
+ obj.Set("tokenizationDurations",
+ cpp_to_js, Napi::Value>(
+ info.Env(),
+ get_ms(_metrics.raw_metrics, &ov::genai::RawPerfMetrics::tokenization_durations)));
+ obj.Set("detokenizationDurations",
+ cpp_to_js, Napi::Value>(
+ info.Env(),
+ get_ms(_metrics.raw_metrics, &ov::genai::RawPerfMetrics::detokenization_durations)));
+
+ obj.Set("timesToFirstToken",
+ cpp_to_js, Napi::Value>(
+ info.Env(),
+ get_ms(_metrics.raw_metrics, &ov::genai::RawPerfMetrics::m_times_to_first_token)));
+ obj.Set("newTokenTimes",
+ cpp_to_js, Napi::Value>(
+ info.Env(),
+ timestamp_to_ms(_metrics.raw_metrics, &ov::genai::RawPerfMetrics::m_new_token_times)));
+ obj.Set("tokenInferDurations",
+ cpp_to_js, Napi::Value>(
+ info.Env(),
+ get_ms(_metrics.raw_metrics, &ov::genai::RawPerfMetrics::m_token_infer_durations)));
+ obj.Set("batchSizes", cpp_to_js, Napi::Value>(info.Env(), _metrics.raw_metrics.m_batch_sizes));
+ obj.Set("durations",
+ cpp_to_js, Napi::Value>(
+ info.Env(),
+ get_ms(_metrics.raw_metrics, &ov::genai::RawPerfMetrics::m_durations)));
+ obj.Set("inferenceDurations",
+ cpp_to_js, Napi::Value>(
+ info.Env(),
+ get_ms(_metrics.raw_metrics, &ov::genai::RawPerfMetrics::m_inference_durations)));
+
+ obj.Set("grammarCompileTimes",
+ cpp_to_js, Napi::Value>(
+ info.Env(),
+ get_ms(_metrics.raw_metrics, &ov::genai::RawPerfMetrics::m_grammar_compile_times)));
+
+ return obj;
+}
+
+template
+Napi::Value BasePerfMetricsWrapper::add(const Napi::CallbackInfo& info) {
+ VALIDATE_ARGS_COUNT(info, 1, "add()");
+ const auto env = info.Env();
+ try {
+ _metrics += unwrap(env, info[0]);
+ } catch (const std::exception& ex) {
+ Napi::TypeError::New(env, ex.what()).ThrowAsJavaScriptException();
+ }
+ return info.This();
+}
+
+template
+MetricsType& BasePerfMetricsWrapper::get_value() {
+ return _metrics;
+}
diff --git a/src/js/include/helper.hpp b/src/js/include/helper.hpp
index 55370d91e3..cccfcc5281 100644
--- a/src/js/include/helper.hpp
+++ b/src/js/include/helper.hpp
@@ -1,9 +1,10 @@
#pragma once
#include
+#include "openvino/core/type/element_type.hpp"
#include "openvino/genai/llm_pipeline.hpp"
#include "openvino/genai/rag/text_embedding_pipeline.hpp"
-#include "openvino/core/type/element_type.hpp"
+#include "openvino/genai/visual_language/pipeline.hpp"
#include "openvino/openvino.hpp"
template struct overloaded : Ts... {using Ts::operator()...;};
@@ -64,6 +65,8 @@ template <>
ov::genai::StructuredOutputConfig::StructuralTag js_to_cpp(const Napi::Env& env, const Napi::Value& value);
template <>
ov::Tensor js_to_cpp(const Napi::Env& env, const Napi::Value& value);
+template <>
+std::vector js_to_cpp>(const Napi::Env& env, const Napi::Value& value);
/**
* @brief Unwraps a C++ object from a JavaScript wrapper.
* @tparam TargetType The C++ class type to extract.
@@ -75,6 +78,9 @@ TargetType& unwrap(const Napi::Env& env, const Napi::Value& value);
template <>
ov::genai::PerfMetrics& unwrap(const Napi::Env& env, const Napi::Value& value);
+template <>
+ov::genai::VLMPerfMetrics& unwrap(const Napi::Env& env, const Napi::Value& value);
+
/**
* @brief Template function to convert C++ data types into Javascript data types
* @tparam TargetType Destinated Javascript data type.
@@ -144,3 +150,7 @@ std::string json_stringify(const Napi::Env& env, const Napi::Value& value);
Napi::Value json_parse(const Napi::Env& env, const std::string& value);
Napi::Function get_prototype_from_ov_addon(const Napi::Env& env, const std::string& ctor_name);
+
+Napi::Object to_decoded_result(const Napi::Env& env, const ov::genai::DecodedResults& results);
+
+Napi::Object to_vlm_decoded_result(const Napi::Env& env, const ov::genai::VLMDecodedResults& results);
diff --git a/src/js/include/perf_metrics.hpp b/src/js/include/perf_metrics.hpp
index dd2aa7f587..fc1ddfbb7b 100644
--- a/src/js/include/perf_metrics.hpp
+++ b/src/js/include/perf_metrics.hpp
@@ -1,36 +1,19 @@
+// Copyright (C) 2018-2025 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+
#pragma once
#include
+#include "include/base/perf_metrics.hpp"
#include "openvino/genai/perf_metrics.hpp"
-class PerfMetricsWrapper : public Napi::ObjectWrap {
+class PerfMetricsWrapper : public BasePerfMetricsWrapper {
public:
PerfMetricsWrapper(const Napi::CallbackInfo& info);
static Napi::Function get_class(Napi::Env env);
static Napi::Object wrap(Napi::Env env, const ov::genai::PerfMetrics& metrics);
- Napi::Value get_load_time(const Napi::CallbackInfo& info);
- Napi::Value get_num_generated_tokens(const Napi::CallbackInfo& info);
- Napi::Value get_num_input_tokens(const Napi::CallbackInfo& info);
- Napi::Value get_ttft(const Napi::CallbackInfo& info);
- Napi::Value get_tpot(const Napi::CallbackInfo& info);
- Napi::Value get_ipot(const Napi::CallbackInfo& info);
- Napi::Value get_throughput(const Napi::CallbackInfo& info);
-
- Napi::Value get_inference_duration(const Napi::CallbackInfo& info);
- Napi::Value get_generate_duration(const Napi::CallbackInfo& info);
- Napi::Value get_tokenization_duration(const Napi::CallbackInfo& info);
- Napi::Value get_detokenization_duration(const Napi::CallbackInfo& info);
-
- Napi::Value get_grammar_compiler_init_times(const Napi::CallbackInfo& info);
- Napi::Value get_grammar_compile_time(const Napi::CallbackInfo& info);
-
Napi::Value get_raw_metrics(const Napi::CallbackInfo& info);
- Napi::Value add(const Napi::CallbackInfo& info);
- ov::genai::PerfMetrics& get_value();
-
-private:
- ov::genai::PerfMetrics _metrics;
};
diff --git a/src/js/include/vlm_pipeline/finish_chat_worker.hpp b/src/js/include/vlm_pipeline/finish_chat_worker.hpp
new file mode 100644
index 0000000000..1c8c621e91
--- /dev/null
+++ b/src/js/include/vlm_pipeline/finish_chat_worker.hpp
@@ -0,0 +1,22 @@
+// Copyright (C) 2018-2025 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+
+#pragma once
+
+#include
+
+#include "openvino/genai/visual_language/pipeline.hpp"
+
+using namespace Napi;
+
+class VLMFinishChatWorker : public AsyncWorker {
+public:
+ VLMFinishChatWorker(Function& callback, std::shared_ptr& pipe);
+ virtual ~VLMFinishChatWorker() {}
+
+ void Execute() override;
+ void OnOK() override;
+
+private:
+ std::shared_ptr& pipe;
+};
diff --git a/src/js/include/vlm_pipeline/init_worker.hpp b/src/js/include/vlm_pipeline/init_worker.hpp
new file mode 100644
index 0000000000..17ca8a794e
--- /dev/null
+++ b/src/js/include/vlm_pipeline/init_worker.hpp
@@ -0,0 +1,32 @@
+// Copyright (C) 2018-2025 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+
+#pragma once
+
+#include
+
+#include "openvino/genai/visual_language/pipeline.hpp"
+
+using namespace Napi;
+
+class VLMInitWorker : public AsyncWorker {
+public:
+ VLMInitWorker(Function& callback,
+ std::shared_ptr& pipe,
+ std::shared_ptr is_initializing,
+ const std::string model_path,
+ std::string device,
+ ov::AnyMap properties);
+ virtual ~VLMInitWorker() {}
+
+ void Execute() override;
+ void OnOK() override;
+ void OnError(const Error& e) override;
+
+private:
+ std::shared_ptr& pipe;
+ std::shared_ptr is_initializing;
+ std::string model_path;
+ std::string device;
+ ov::AnyMap properties;
+};
diff --git a/src/js/include/vlm_pipeline/perf_metrics.hpp b/src/js/include/vlm_pipeline/perf_metrics.hpp
new file mode 100644
index 0000000000..4333b159c1
--- /dev/null
+++ b/src/js/include/vlm_pipeline/perf_metrics.hpp
@@ -0,0 +1,21 @@
+// Copyright (C) 2018-2025 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+
+#pragma once
+
+#include
+
+#include "include/base/perf_metrics.hpp"
+#include "openvino/genai/visual_language/perf_metrics.hpp"
+
+class VLMPerfMetricsWrapper : public BasePerfMetricsWrapper {
+public:
+ VLMPerfMetricsWrapper(const Napi::CallbackInfo& info);
+
+ static Napi::Function get_class(Napi::Env env);
+ static Napi::Object wrap(Napi::Env env, const ov::genai::VLMPerfMetrics& metrics);
+
+ Napi::Value get_prepare_embeddings_duration(const Napi::CallbackInfo& info);
+ Napi::Value get_raw_metrics(const Napi::CallbackInfo& info);
+ Napi::Value get_vlm_raw_metrics(const Napi::CallbackInfo& info);
+};
diff --git a/src/js/include/vlm_pipeline/start_chat_worker.hpp b/src/js/include/vlm_pipeline/start_chat_worker.hpp
new file mode 100644
index 0000000000..cb7ce0ae8d
--- /dev/null
+++ b/src/js/include/vlm_pipeline/start_chat_worker.hpp
@@ -0,0 +1,23 @@
+// Copyright (C) 2018-2025 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+
+#pragma once
+
+#include
+
+#include "openvino/genai/visual_language/pipeline.hpp"
+
+using namespace Napi;
+
+class VLMStartChatWorker : public AsyncWorker {
+public:
+ VLMStartChatWorker(Function& callback, std::shared_ptr& pipe, std::string system_message);
+ virtual ~VLMStartChatWorker() {}
+
+ void Execute() override;
+ void OnOK() override;
+
+private:
+ std::shared_ptr& pipe;
+ std::string system_message;
+};
diff --git a/src/js/include/vlm_pipeline/vlm_pipeline_wrapper.hpp b/src/js/include/vlm_pipeline/vlm_pipeline_wrapper.hpp
new file mode 100644
index 0000000000..b7a34ab193
--- /dev/null
+++ b/src/js/include/vlm_pipeline/vlm_pipeline_wrapper.hpp
@@ -0,0 +1,30 @@
+// Copyright (C) 2018-2025 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+
+#pragma once
+
+#include
+
+#include
+
+#include "openvino/genai/visual_language/pipeline.hpp"
+
+class VLMPipelineWrapper : public Napi::ObjectWrap {
+public:
+ VLMPipelineWrapper(const Napi::CallbackInfo& info);
+
+ static Napi::Function get_class(Napi::Env env);
+
+ Napi::Value init(const Napi::CallbackInfo& info);
+ Napi::Value generate(const Napi::CallbackInfo& info);
+ Napi::Value start_chat(const Napi::CallbackInfo& info);
+ Napi::Value finish_chat(const Napi::CallbackInfo& info);
+ Napi::Value get_tokenizer(const Napi::CallbackInfo& info);
+ Napi::Value set_chat_template(const Napi::CallbackInfo& info);
+ Napi::Value set_generation_config(const Napi::CallbackInfo& info);
+
+private:
+ std::shared_ptr pipe = nullptr;
+ std::shared_ptr is_initializing = std::make_shared(false);
+ std::shared_ptr is_generating = std::make_shared(false);
+};
diff --git a/src/js/lib/addon.ts b/src/js/lib/addon.ts
index b6023e5a09..188af9324f 100644
--- a/src/js/lib/addon.ts
+++ b/src/js/lib/addon.ts
@@ -1,9 +1,12 @@
import { createRequire } from "module";
import { platform } from "node:os";
import { join, dirname, resolve } from "node:path";
+import { Tensor } from "openvino-node";
import type { ChatHistory as IChatHistory } from "./chatHistory.js";
import type { Tokenizer as ITokenizer } from "./tokenizer.js";
import { addon as ovAddon } from "openvino-node";
+import { GenerationConfig, StreamingStatus, VLMPipelineProperties } from "./utils.js";
+import { VLMPerfMetrics } from "./perfMetrics.js";
export type EmbeddingResult = Float32Array | Int8Array | Uint8Array;
export type EmbeddingResults = Float32Array[] | Int8Array[] | Uint8Array[];
@@ -58,9 +61,36 @@ export interface TextEmbeddingPipelineWrapper {
embedDocumentsSync(documents: string[]): EmbeddingResults;
}
+export interface VLMPipeline {
+ new (): VLMPipeline;
+ init(
+ modelPath: string,
+ device: string,
+ ovProperties: VLMPipelineProperties,
+ callback: (err: Error | null) => void,
+ ): void;
+ generate(
+ prompt: string,
+ images: Tensor[] | undefined,
+ videos: Tensor[] | undefined,
+ streamer: ((chunk: string) => StreamingStatus) | undefined,
+ generationConfig: GenerationConfig | undefined,
+ callback: (
+ err: Error | null,
+ result: { texts: string[]; scores: number[]; perfMetrics: VLMPerfMetrics },
+ ) => void,
+ ): void;
+ startChat(systemMessage: string, callback: (err: Error | null) => void): void;
+ finishChat(callback: (err: Error | null) => void): void;
+ getTokenizer(): ITokenizer;
+ setChatTemplate(template: string): void;
+ setGenerationConfig(config: GenerationConfig): void;
+}
+
interface OpenVINOGenAIAddon {
TextEmbeddingPipeline: TextEmbeddingPipelineWrapper;
LLMPipeline: any;
+ VLMPipeline: VLMPipeline;
ChatHistory: IChatHistory;
Tokenizer: ITokenizer;
setOpenvinoAddon: (ovAddon: any) => void;
@@ -84,6 +114,6 @@ function getGenAIAddon(): OpenVINOGenAIAddon {
const addon = getGenAIAddon();
addon.setOpenvinoAddon(ovAddon);
-export const { TextEmbeddingPipeline, LLMPipeline, ChatHistory, Tokenizer } = addon;
+export const { TextEmbeddingPipeline, LLMPipeline, VLMPipeline, ChatHistory, Tokenizer } = addon;
export type ChatHistory = IChatHistory;
export type Tokenizer = ITokenizer;
diff --git a/src/js/lib/decodedResults.ts b/src/js/lib/decodedResults.ts
new file mode 100644
index 0000000000..f74db73508
--- /dev/null
+++ b/src/js/lib/decodedResults.ts
@@ -0,0 +1,56 @@
+// Copyright (C) 2018-2025 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+
+import { PerfMetrics, VLMPerfMetrics } from "./perfMetrics.js";
+
+/**
+ * Structure to store resulting batched text outputs and scores for each batch.
+ * @note The first num_return_sequences elements correspond to the first batch element.
+ */
+export class DecodedResults {
+ /**
+ * @param {string[]} texts - Vector of resulting sequences.
+ * @param {number[]} scores - Scores for each sequence.
+ * @param {PerfMetrics} perfMetrics - Performance metrics (tpot, ttft, etc.).
+ */
+ constructor(texts: string[], scores: number[], perfMetrics: PerfMetrics) {
+ this.texts = texts;
+ this.scores = scores;
+ this.perfMetrics = perfMetrics;
+ }
+ toString() {
+ if (this.scores.length !== this.texts.length) {
+ throw new Error("The number of scores and texts doesn't match in DecodedResults.");
+ }
+ if (this.texts.length === 0) {
+ return "";
+ }
+ if (this.texts.length === 1) {
+ return this.texts[0];
+ }
+ const lines = this.scores.map((score, i) => `${score.toFixed(6)}: ${this.texts[i]}`);
+ return lines.join("\n");
+ }
+ texts: string[];
+ scores: number[];
+ perfMetrics: PerfMetrics;
+}
+
+/**
+ * Structure to store VLM resulting batched text outputs and scores for each batch.
+ * @note The first num_return_sequences elements correspond to the first batch element.
+ */
+export class VLMDecodedResults extends DecodedResults {
+ /**
+ * @param {string[]} texts - Vector of resulting sequences.
+ * @param {number[]} scores - Scores for each sequence.
+ * @param {VLMPerfMetrics} perfMetrics - VLM-specific performance metrics.
+ */
+ constructor(texts: string[], scores: number[], perfMetrics: VLMPerfMetrics) {
+ super(texts, scores, perfMetrics);
+ this.perfMetrics = perfMetrics;
+ }
+
+ /** VLM specific performance metrics. */
+ perfMetrics: VLMPerfMetrics;
+}
diff --git a/src/js/lib/index.ts b/src/js/lib/index.ts
index ad8e49168f..dd36cf2227 100644
--- a/src/js/lib/index.ts
+++ b/src/js/lib/index.ts
@@ -2,8 +2,9 @@
// SPDX-License-Identifier: Apache-2.0
import { LLMPipeline as LLM } from "./pipelines/llmPipeline.js";
+import { VLMPipeline as VLM } from "./pipelines/vlmPipeline.js";
import { TextEmbeddingPipeline as Embedding } from "./pipelines/textEmbeddingPipeline.js";
-import { LLMPipelineProperties } from "./utils.js";
+import { LLMPipelineProperties, VLMPipelineProperties } from "./utils.js";
class PipelineFactory {
static async LLMPipeline(modelPath: string, device?: string): Promise;
@@ -28,6 +29,18 @@ class PipelineFactory {
await pipeline.init();
return pipeline;
}
+
+ static async VLMPipeline(
+ modelPath: string,
+ device: string = "CPU",
+ properties: VLMPipelineProperties = {},
+ ) {
+ const pipeline = new VLM(modelPath, device, properties);
+ await pipeline.init();
+
+ return pipeline;
+ }
+
static async TextEmbeddingPipeline(modelPath: string, device = "CPU", config = {}) {
const pipeline = new Embedding(modelPath, device, config);
await pipeline.init();
@@ -36,8 +49,9 @@ class PipelineFactory {
}
}
-export const { LLMPipeline, TextEmbeddingPipeline } = PipelineFactory;
-export { DecodedResults } from "./pipelines/llmPipeline.js";
+export const { LLMPipeline, VLMPipeline, TextEmbeddingPipeline } = PipelineFactory;
+export { DecodedResults, VLMDecodedResults } from "./decodedResults.js";
+export { PerfMetrics, VLMPerfMetrics } from "./perfMetrics.js";
export * from "./utils.js";
export * from "./addon.js";
export type { TokenizedInputs, EncodeOptions, DecodeOptions } from "./tokenizer.js";
diff --git a/src/js/lib/perfMetrics.ts b/src/js/lib/perfMetrics.ts
new file mode 100644
index 0000000000..d312505024
--- /dev/null
+++ b/src/js/lib/perfMetrics.ts
@@ -0,0 +1,118 @@
+// Copyright (C) 2018-2025 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+
+/** Structure holding mean and standard deviation values. */
+export type MeanStdPair = {
+ mean: number;
+ std: number;
+};
+
+/** Structure holding summary of statistical values */
+export type SummaryStats = MeanStdPair & {
+ min: number;
+ max: number;
+};
+
+/** Structure with raw performance metrics for each generation before any statistics are calculated. */
+export type RawMetrics = {
+ /** Durations for each generate call in milliseconds. */
+ generateDurations: number[];
+ /** Durations for the tokenization process in milliseconds. */
+ tokenizationDurations: number[];
+ /** Durations for the detokenization process in milliseconds. */
+ detokenizationDurations: number[];
+ /** Times to the first token for each call in milliseconds. */
+ timesToFirstToken: number[];
+ /** Timestamps of generation every token or batch of tokens in milliseconds. */
+ newTokenTimes: number[];
+ /** Inference time for each token in milliseconds. */
+ tokenInferDurations: number[];
+ /** Batch sizes for each generate call. */
+ batchSizes: number[];
+ /** Total durations for each generate call in milliseconds. */
+ durations: number[];
+ /** Total inference duration for each generate call in microseconds. */
+ inferenceDurations: number[];
+ /** Time to compile the grammar in milliseconds. */
+ grammarCompileTimes: number[];
+};
+
+/** Structure with raw performance metrics for VLM generation. */
+export type VLMRawMetrics = {
+ /** Durations for embedding preparation in milliseconds. */
+ prepareEmbeddingsDurations: number[];
+};
+
+/**
+ * Holds performance metrics for each generate call.
+ *
+ * PerfMetrics holds the following metrics with mean and standard deviations:
+ - Time To the First Token (TTFT), ms
+ - Time per Output Token (TPOT), ms/token
+ - Inference time per Output Token (IPOT), ms/token
+ - Generate total duration, ms
+ - Inference duration, ms
+ - Tokenization duration, ms
+ - Detokenization duration, ms
+ - Throughput, tokens/s
+ - Load time, ms
+ - Number of generated tokens
+ - Number of tokens in the input prompt
+ - Time to initialize grammar compiler for each backend, ms
+ - Time to compile grammar, ms
+ * Preferable way to access metrics is via getter methods. Getter methods calculate mean and std values from rawMetrics and return pairs.
+ * If mean and std were already calculated, getter methods return cached values.
+ */
+export interface PerfMetrics {
+ /** Returns the load time in milliseconds. */
+ getLoadTime(): number;
+ /** Returns the number of generated tokens. */
+ getNumGeneratedTokens(): number;
+ /** Returns the number of tokens in the input prompt. */
+ getNumInputTokens(): number;
+ /** Returns the mean and standard deviation of Time To the First Token (TTFT) in milliseconds. */
+ getTTFT(): MeanStdPair;
+ /** Returns the mean and standard deviation of Time Per Output Token (TPOT) in milliseconds. */
+ getTPOT(): MeanStdPair;
+ /** Returns the mean and standard deviation of Inference time Per Output Token in milliseconds. */
+ getIPOT(): MeanStdPair;
+ /** Returns the mean and standard deviation of throughput in tokens per second. */
+ getThroughput(): MeanStdPair;
+ /** Returns the mean and standard deviation of the time spent on model inference during generate call in milliseconds. */
+ getInferenceDuration(): MeanStdPair;
+ /** Returns the mean and standard deviation of generate durations in milliseconds. */
+ getGenerateDuration(): MeanStdPair;
+ /** Returns the mean and standard deviation of tokenization durations in milliseconds. */
+ getTokenizationDuration(): MeanStdPair;
+ /** Returns the mean and standard deviation of detokenization durations in milliseconds. */
+ getDetokenizationDuration(): MeanStdPair;
+ /** Returns a map with the time to initialize the grammar compiler for each backend in milliseconds. */
+ getGrammarCompilerInitTimes(): { [key: string]: number };
+ /** Returns the mean, standard deviation, min, and max of grammar compile times in milliseconds. */
+ getGrammarCompileTime(): SummaryStats;
+ /** A structure of RawPerfMetrics type that holds raw metrics. */
+ rawMetrics: RawMetrics;
+
+ /** Adds the metrics from another PerfMetrics object to this one.
+ * @returns The current PerfMetrics instance.
+ */
+ add(other: PerfMetrics): this;
+}
+
+/**
+ * Holds performance metrics for each VLM generate call.
+ *
+ * VLMPerfMetrics extends PerfMetrics with VLM-specific metrics:
+ * - Prepare embeddings duration, ms
+ */
+export interface VLMPerfMetrics extends PerfMetrics {
+ /** Returns the mean and standard deviation of embeddings preparation duration in milliseconds. */
+ getPrepareEmbeddingsDuration(): MeanStdPair;
+ /** VLM specific raw metrics */
+ vlmRawMetrics: VLMRawMetrics;
+
+ /** Adds the metrics from another VLMPerfMetrics object to this one.
+ * @returns The current VLMPerfMetrics instance.
+ */
+ add(other: VLMPerfMetrics): this;
+}
diff --git a/src/js/lib/pipelines/llmPipeline.ts b/src/js/lib/pipelines/llmPipeline.ts
index f05e654e5e..ce2a418436 100644
--- a/src/js/lib/pipelines/llmPipeline.ts
+++ b/src/js/lib/pipelines/llmPipeline.ts
@@ -1,6 +1,7 @@
import util from "node:util";
import { ChatHistory, LLMPipeline as LLMPipelineWrap } from "../addon.js";
import { GenerationConfig, StreamingStatus, LLMPipelineProperties } from "../utils.js";
+import { DecodedResults } from "../decodedResults.js";
import { Tokenizer } from "../tokenizer.js";
export type ResolveFunction = (arg: { value: string; done: boolean }) => void;
@@ -9,131 +10,6 @@ export type Options = {
max_new_tokens?: number;
};
-/** Structure with raw performance metrics for each generation before any statistics are calculated. */
-export type RawMetrics = {
- /** Durations for each generate call in milliseconds. */
- generateDurations: number[];
- /** Durations for the tokenization process in milliseconds. */
- tokenizationDurations: number[];
- /** Durations for the detokenization process in milliseconds. */
- detokenizationDurations: number[];
- /** Times to the first token for each call in milliseconds. */
- timesToFirstToken: number[];
- /** Timestamps of generation every token or batch of tokens in milliseconds. */
- newTokenTimes: number[];
- /** Inference time for each token in milliseconds. */
- tokenInferDurations: number[];
- /** Batch sizes for each generate call. */
- batchSizes: number[];
- /** Total durations for each generate call in milliseconds. */
- durations: number[];
- /** Total inference duration for each generate call in microseconds. */
- inferenceDurations: number[];
- /** Time to compile the grammar in milliseconds. */
- grammarCompileTimes: number[];
-};
-
-/** Structure holding mean and standard deviation values. */
-export type MeanStdPair = {
- mean: number;
- std: number;
-};
-
-/** Structure holding summary of statistical values */
-export type SummaryStats = {
- mean: number;
- std: number;
- min: number;
- max: number;
-};
-
-/**
- * Holds performance metrics for each generate call.
- *
- * PerfMetrics holds the following metrics with mean and standard deviations:
- - Time To the First Token (TTFT), ms
- - Time per Output Token (TPOT), ms/token
- - Inference time per Output Token (IPOT), ms/token
- - Generate total duration, ms
- - Inference duration, ms
- - Tokenization duration, ms
- - Detokenization duration, ms
- - Throughput, tokens/s
- - Load time, ms
- - Number of generated tokens
- - Number of tokens in the input prompt
- - Time to initialize grammar compiler for each backend, ms
- - Time to compile grammar, ms
- * Preferable way to access metrics is via getter methods. Getter methods calculate mean and std values from rawMetrics and return pairs.
- * If mean and std were already calculated, getter methods return cached values.
- */
-export interface PerfMetrics {
- /** Returns the load time in milliseconds. */
- getLoadTime(): number;
- /** Returns the number of generated tokens. */
- getNumGeneratedTokens(): number;
- /** Returns the number of tokens in the input prompt. */
- getNumInputTokens(): number;
- /** Returns the mean and standard deviation of Time To the First Token (TTFT) in milliseconds. */
- getTTFT(): MeanStdPair;
- /** Returns the mean and standard deviation of Time Per Output Token (TPOT) in milliseconds. */
- getTPOT(): MeanStdPair;
- /** Returns the mean and standard deviation of Inference time Per Output Token in milliseconds. */
- getIPOT(): MeanStdPair;
- /** Returns the mean and standard deviation of throughput in tokens per second. */
- getThroughput(): MeanStdPair;
- /** Returns the mean and standard deviation of the time spent on model inference during generate call in milliseconds. */
- getInferenceDuration(): MeanStdPair;
- /** Returns the mean and standard deviation of generate durations in milliseconds. */
- getGenerateDuration(): MeanStdPair;
- /** Returns the mean and standard deviation of tokenization durations in milliseconds. */
- getTokenizationDuration(): MeanStdPair;
- /** Returns the mean and standard deviation of detokenization durations in milliseconds. */
- getDetokenizationDuration(): MeanStdPair;
- /** Returns a map with the time to initialize the grammar compiler for each backend in milliseconds. */
- getGrammarCompilerInitTimes(): { [key: string]: number };
- /** Returns the mean, standard deviation, min, and max of grammar compile times in milliseconds. */
- getGrammarCompileTime(): SummaryStats;
- /** A structure of RawPerfMetrics type that holds raw metrics. */
- rawMetrics: RawMetrics;
-
- /** Adds the metrics from another PerfMetrics object to this one.
- * @returns The current PerfMetrics instance.
- */
- add(other: PerfMetrics): this;
-}
-
-export class DecodedResults {
- constructor(texts: string[], scores: number[], perfMetrics: PerfMetrics) {
- this.texts = texts;
- this.scores = scores;
- this.perfMetrics = perfMetrics;
- }
- toString() {
- if (this.scores.length !== this.texts.length) {
- throw new Error("The number of scores and texts doesn't match in DecodedResults.");
- }
- if (this.texts.length === 0) {
- return "";
- }
- if (this.texts.length === 1) {
- return this.texts[0];
- }
- let result = "";
- for (let i = 0; i < this.texts.length - 1; ++i) {
- result += `${this.scores[i].toFixed(6)}: ${this.texts[i]}\n`;
- }
- result += `${this.scores[this.scores.length - 1].toFixed(
- 6,
- )}: ${this.texts[this.texts.length - 1]}`;
-
- return result;
- }
- texts: string[];
- scores: number[];
- perfMetrics: PerfMetrics;
-}
-
export class LLMPipeline {
modelPath: string;
device: string;
diff --git a/src/js/lib/pipelines/vlmPipeline.ts b/src/js/lib/pipelines/vlmPipeline.ts
new file mode 100644
index 0000000000..cdede8ee25
--- /dev/null
+++ b/src/js/lib/pipelines/vlmPipeline.ts
@@ -0,0 +1,225 @@
+// Copyright (C) 2018-2025 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+
+import util from "node:util";
+import { VLMPipeline as VLMPipelineWrapper } from "../addon.js";
+import { GenerationConfig, VLMPipelineProperties, StreamingStatus } from "../utils.js";
+import { VLMDecodedResults } from "../decodedResults.js";
+import { Tokenizer } from "../tokenizer.js";
+import type { Tensor } from "openvino-node";
+import { VLMPerfMetrics } from "../perfMetrics.js";
+
+/**
+ * Options for VLM generation methods.
+ */
+export type VLMGenerateOptions = {
+ /** Array of image tensors to include in the prompt. */
+ images?: Tensor[];
+ /** Array of video frame tensors to include in the prompt. */
+ videos?: Tensor[];
+ /** Generation configuration parameters such as max_length, temperature, etc. */
+ generationConfig?: GenerationConfig;
+};
+
+/**
+ * This class is used for generation with Visual Language Models (VLMs)
+ */
+export class VLMPipeline {
+ protected readonly modelPath: string;
+ protected readonly device: string;
+ protected pipeline: VLMPipelineWrapper | null = null;
+ protected readonly properties: VLMPipelineProperties;
+
+ /**
+ * Construct a VLM pipeline from a folder containing tokenizer and model IRs.
+ * @param modelPath - A folder to read tokenizer and model IRs.
+ * @param device - Inference device. A tokenizer is always compiled for CPU.
+ * @param properties - Device and pipeline properties.
+ */
+ constructor(modelPath: string, device: string, properties: VLMPipelineProperties) {
+ this.modelPath = modelPath;
+ this.device = device;
+ this.properties = properties;
+ }
+
+ /**
+ * Initialize the underlying native pipeline.
+ * @returns Resolves when initialization is complete.
+ */
+ async init() {
+ const pipeline = new VLMPipelineWrapper();
+
+ const initPromise = util.promisify(pipeline.init.bind(pipeline));
+ await initPromise(this.modelPath, this.device, this.properties);
+
+ this.pipeline = pipeline;
+ }
+ /**
+ * Start a chat session with an optional system message.
+ * @param systemMessage - Optional system message to initialize chat context.
+ * @returns Resolves when chat session is started.
+ */
+ async startChat(systemMessage: string = "") {
+ if (!this.pipeline) throw new Error("Pipeline is not initialized");
+
+ const startChatPromise = util.promisify(this.pipeline.startChat.bind(this.pipeline));
+ const result = await startChatPromise(systemMessage);
+
+ return result;
+ }
+ /**
+ * Finish the current chat session and clear chat-related state.
+ * @returns Resolves when chat session is finished.
+ */
+ async finishChat() {
+ if (!this.pipeline) throw new Error("Pipeline is not initialized");
+
+ const finishChatPromise = util.promisify(this.pipeline.finishChat.bind(this.pipeline));
+ const result = await finishChatPromise();
+
+ return result;
+ }
+ /**
+ * Stream generation results as an async iterator of strings.
+ * The iterator yields subword chunks.
+ * @param prompt - Input prompt. May contain image/video tags recognized by the model.
+ * @param options - Optional parameters.
+ * @param options.images - Array of image tensors to include in the prompt.
+ * @param options.videos - Array of video frame tensors to include in the prompt.
+ * @param options.generationConfig - Generation parameters.
+ * @returns Async iterator producing subword chunks.
+ */
+ stream(prompt: string, options: VLMGenerateOptions = {}): AsyncIterableIterator {
+ if (!this.pipeline) throw new Error("Pipeline is not initialized");
+ const { images, videos, generationConfig } = options;
+
+ let streamingStatus: StreamingStatus = StreamingStatus.RUNNING;
+ const queue: { done: boolean; subword: string }[] = [];
+ type ResolveFunction = (arg: { value: string; done: boolean }) => void;
+ type RejectFunction = (reason?: unknown) => void;
+ let resolvePromise: ResolveFunction | null;
+ let rejectPromise: RejectFunction | null;
+
+ const callback = (
+ error: Error | null,
+ result: { texts: string[]; scores: number[]; perfMetrics: VLMPerfMetrics },
+ ) => {
+ if (error) {
+ if (rejectPromise) {
+ rejectPromise(error);
+ // Reset promises
+ resolvePromise = null;
+ rejectPromise = null;
+ } else {
+ throw error;
+ }
+ } else {
+ const decodedResult = new VLMDecodedResults(
+ result.texts,
+ result.scores,
+ result.perfMetrics,
+ );
+ const fullText = decodedResult.toString();
+ if (resolvePromise) {
+ // Fulfill pending request
+ resolvePromise({ done: true, value: fullText });
+ // Reset promises
+ resolvePromise = null;
+ rejectPromise = null;
+ } else {
+ // Add data to queue if no pending promise
+ queue.push({ done: true, subword: fullText });
+ }
+ }
+ };
+
+ const streamer = (chunk: string): StreamingStatus => {
+ if (resolvePromise) {
+ // Fulfill pending request
+ resolvePromise({ done: false, value: chunk });
+ // Reset promises
+ resolvePromise = null;
+ rejectPromise = null;
+ } else {
+ // Add data to queue if no pending promise
+ queue.push({ done: false, subword: chunk });
+ }
+ return streamingStatus;
+ };
+
+ this.pipeline.generate(prompt, images, videos, streamer, generationConfig, callback);
+
+ return {
+ async next() {
+ // If there is data in the queue, return it
+ // Otherwise, return a promise that will resolve when data is available
+ const data = queue.shift();
+
+ if (data) {
+ return { value: data.subword, done: data.done };
+ }
+
+ return new Promise((resolve: ResolveFunction, reject: (reason?: unknown) => void) => {
+ resolvePromise = resolve;
+ rejectPromise = reject;
+ });
+ },
+ async return() {
+ streamingStatus = StreamingStatus.CANCEL;
+
+ return { done: true, value: "" };
+ },
+ [Symbol.asyncIterator]() {
+ return this;
+ },
+ };
+ }
+ /**
+ * Generate sequences for VLMs.
+ * @param prompt - Input prompt. May contain model-specific image/video tags.
+ * @param options - Optional parameters.
+ * @param options.images - Images to include in the prompt.
+ * @param options.videos - Videos to include in the prompt.
+ * @param options.generationConfig - Generation configuration parameters.
+ * @param options.streamer - Optional streamer callback called for each chunk.
+ * @returns Resolves with decoded results once generation finishes.
+ */
+ async generate(
+ prompt: string,
+ options: VLMGenerateOptions & { streamer?: (chunk: string) => StreamingStatus } = {},
+ ): Promise {
+ const { images, videos, generationConfig, streamer } = options;
+ if (!this.pipeline) throw new Error("Pipeline is not initialized");
+ const innerGenerate = util.promisify(this.pipeline.generate.bind(this.pipeline));
+ const result = await innerGenerate(prompt, images, videos, streamer, generationConfig);
+
+ return new VLMDecodedResults(result.texts, result.scores, result.perfMetrics);
+ }
+
+ /**
+ * Get the pipeline tokenizer instance.
+ * @returns Tokenizer used by the pipeline.
+ */
+ getTokenizer(): Tokenizer {
+ if (!this.pipeline) throw new Error("Pipeline is not initialized");
+ return this.pipeline.getTokenizer();
+ }
+
+ /**
+ * Set the chat template used when formatting chat history and prompts.
+ * @param chatTemplate - Chat template string.
+ */
+ setChatTemplate(chatTemplate: string): void {
+ if (!this.pipeline) throw new Error("Pipeline is not initialized");
+ this.pipeline.setChatTemplate(chatTemplate);
+ }
+
+ /**
+ * Set generation configuration parameters.
+ * @param config - Generation configuration parameters.
+ */
+ setGenerationConfig(config: GenerationConfig): void {
+ if (!this.pipeline) throw new Error("Pipeline is not initialized");
+ this.pipeline.setGenerationConfig(config);
+ }
+}
diff --git a/src/js/lib/utils.ts b/src/js/lib/utils.ts
index 43684e9fa0..17211549e2 100644
--- a/src/js/lib/utils.ts
+++ b/src/js/lib/utils.ts
@@ -344,3 +344,7 @@ export type SchedulerConfig = {
export type LLMPipelineProperties = {
schedulerConfig?: SchedulerConfig;
};
+
+export type VLMPipelineProperties = {
+ schedulerConfig?: SchedulerConfig;
+} & Record;
diff --git a/src/js/src/addon.cpp b/src/js/src/addon.cpp
index 72cb3b6b16..80c8d800b1 100644
--- a/src/js/src/addon.cpp
+++ b/src/js/src/addon.cpp
@@ -5,6 +5,8 @@
#include "include/perf_metrics.hpp"
#include "include/llm_pipeline/llm_pipeline_wrapper.hpp"
+#include "include/vlm_pipeline/vlm_pipeline_wrapper.hpp"
+#include "include/vlm_pipeline/perf_metrics.hpp"
#include "include/text_embedding_pipeline/pipeline_wrapper.hpp"
#include "include/tokenizer.hpp"
#include "include/chat_history.hpp"
@@ -47,9 +49,11 @@ Napi::Object init_module(Napi::Env env, Napi::Object exports) {
env.SetInstanceData(addon_data);
init_class(env, exports, "LLMPipeline", &LLMPipelineWrapper::get_class, addon_data->core);
+ init_class(env, exports, "VLMPipeline", &VLMPipelineWrapper::get_class, addon_data->vlm_pipeline);
init_class(env, exports, "TextEmbeddingPipeline", &TextEmbeddingPipelineWrapper::get_class, addon_data->core);
init_class(env, exports, "Tokenizer", &TokenizerWrapper::get_class, addon_data->tokenizer);
init_class(env, exports, "PerfMetrics", &PerfMetricsWrapper::get_class, addon_data->perf_metrics);
+ init_class(env, exports, "VLMPerfMetrics", &VLMPerfMetricsWrapper::get_class, addon_data->vlm_perf_metrics);
init_class(env, exports, "ChatHistory", &ChatHistoryWrap::get_class, addon_data->chat_history);
// Expose a helper to set the openvino-node addon from JS (useful for ESM)
diff --git a/src/js/src/helper.cpp b/src/js/src/helper.cpp
index 5414a7f522..523f09568d 100644
--- a/src/js/src/helper.cpp
+++ b/src/js/src/helper.cpp
@@ -3,6 +3,7 @@
#include "include/addon.hpp"
#include "include/chat_history.hpp"
#include "include/perf_metrics.hpp"
+#include "include/vlm_pipeline/perf_metrics.hpp"
namespace {
constexpr const char* JS_SCHEDULER_CONFIG_KEY = "schedulerConfig";
@@ -337,6 +338,25 @@ ov::Tensor js_to_cpp(const Napi::Env& env, const Napi::Value& value)
return *tensor_ptr;
}
+template <>
+std::vector js_to_cpp>(const Napi::Env& env, const Napi::Value& value) {
+ std::vector tensors;
+ if (value.IsUndefined() || value.IsNull()) {
+ return tensors;
+ }
+ if (value.IsArray()) {
+ auto array = value.As();
+ size_t length = array.Length();
+ tensors.reserve(length);
+ for (uint32_t i = 0; i < length; ++i) {
+ tensors.push_back(js_to_cpp(env, array[i]));
+ }
+ } else {
+ OPENVINO_THROW("Passed argument must be an array of Tensors.");
+ }
+ return tensors;
+}
+
template <>
ov::genai::PerfMetrics& unwrap(const Napi::Env& env, const Napi::Value& value) {
const auto obj = value.As();
@@ -350,6 +370,17 @@ ov::genai::PerfMetrics& unwrap(const Napi::Env& env, con
return js_metrics->get_value();
}
+template <>
+ov::genai::VLMPerfMetrics& unwrap(const Napi::Env& env, const Napi::Value& value) {
+ const auto obj = value.As();
+ const auto& prototype = env.GetInstanceData()->vlm_perf_metrics;
+ OPENVINO_ASSERT(prototype, "Invalid pointer to prototype.");
+ OPENVINO_ASSERT(obj.InstanceOf(prototype.Value().As()),
+ "Passed argument is not of type VLMPerfMetrics");
+ const auto js_metrics = Napi::ObjectWrap::Unwrap(obj);
+ return js_metrics->get_value();
+}
+
template <>
ov::genai::ChatHistory& unwrap(const Napi::Env& env, const Napi::Value& value) {
OPENVINO_ASSERT(value.IsObject(), "Passed argument must be an object.");
@@ -539,3 +570,20 @@ Napi::Function get_prototype_from_ov_addon(const Napi::Env& env, const std::stri
return ctor_val.As();
}
+
+Napi::Object to_decoded_result(const Napi::Env& env, const ov::genai::DecodedResults& results) {
+ Napi::Object obj = Napi::Object::New(env);
+ obj.Set("texts", cpp_to_js, Napi::Value>(env, results.texts));
+ obj.Set("scores", cpp_to_js, Napi::Value>(env, results.scores));
+ obj.Set("perfMetrics", PerfMetricsWrapper::wrap(env, results.perf_metrics));
+ obj.Set("subword", Napi::String::New(env, results));
+ return obj;
+}
+
+Napi::Object to_vlm_decoded_result(const Napi::Env& env, const ov::genai::VLMDecodedResults& results) {
+ Napi::Object obj = Napi::Object::New(env);
+ obj.Set("texts", cpp_to_js, Napi::Value>(env, results.texts));
+ obj.Set("scores", cpp_to_js, Napi::Value>(env, results.scores));
+ obj.Set("perfMetrics", VLMPerfMetricsWrapper::wrap(env, results.perf_metrics));
+ return obj;
+}
diff --git a/src/js/src/llm_pipeline/llm_pipeline_wrapper.cpp b/src/js/src/llm_pipeline/llm_pipeline_wrapper.cpp
index 6a78ad24f8..19327b1e60 100644
--- a/src/js/src/llm_pipeline/llm_pipeline_wrapper.cpp
+++ b/src/js/src/llm_pipeline/llm_pipeline_wrapper.cpp
@@ -22,15 +22,6 @@ struct TsfnContext {
std::shared_ptr options = nullptr;
};
-Napi::Object create_decoded_results_object(Napi::Env env, const ov::genai::DecodedResults& result) {
- Napi::Object obj = Napi::Object::New(env);
- obj.Set("texts", cpp_to_js, Napi::Value>(env, result.texts));
- obj.Set("scores", cpp_to_js, Napi::Value>(env, result.scores));
- obj.Set("perfMetrics", PerfMetricsWrapper::wrap(env, result.perf_metrics));
- obj.Set("subword", Napi::String::New(env, result));
- return obj;
-}
-
void performInferenceThread(TsfnContext* context) {
try {
ov::genai::GenerationConfig config;
@@ -89,7 +80,7 @@ void performInferenceThread(TsfnContext* context) {
}, context->inputs);
napi_status status = context->tsfn.BlockingCall([result](Napi::Env env, Napi::Function jsCallback) {
- jsCallback.Call({Napi::Boolean::New(env, true), create_decoded_results_object(env, result)});
+ jsCallback.Call({Napi::Boolean::New(env, true), to_decoded_result(env, result)});
});
if (status != napi_ok) {
diff --git a/src/js/src/perf_metrics.cpp b/src/js/src/perf_metrics.cpp
index 1dfdbb62bb..cfce4c24df 100644
--- a/src/js/src/perf_metrics.cpp
+++ b/src/js/src/perf_metrics.cpp
@@ -1,37 +1,16 @@
+// Copyright (C) 2018-2025 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+
#include "include/perf_metrics.hpp"
-#include "bindings_utils.hpp"
#include "include/addon.hpp"
-#include "include/helper.hpp"
-
-using ov::genai::common_bindings::utils::get_ms;
-using ov::genai::common_bindings::utils::timestamp_to_ms;
PerfMetricsWrapper::PerfMetricsWrapper(const Napi::CallbackInfo& info)
- : Napi::ObjectWrap(info),
- _metrics{} {};
+ : BasePerfMetricsWrapper(info) {}
Napi::Function PerfMetricsWrapper::get_class(Napi::Env env) {
- return DefineClass(
- env,
- "PerfMetrics",
- {
- InstanceMethod("getLoadTime", &PerfMetricsWrapper::get_load_time),
- InstanceMethod("getNumGeneratedTokens", &PerfMetricsWrapper::get_num_generated_tokens),
- InstanceMethod("getNumInputTokens", &PerfMetricsWrapper::get_num_input_tokens),
- InstanceMethod("getTTFT", &PerfMetricsWrapper::get_ttft),
- InstanceMethod("getTPOT", &PerfMetricsWrapper::get_tpot),
- InstanceMethod("getIPOT", &PerfMetricsWrapper::get_ipot),
- InstanceMethod("getThroughput", &PerfMetricsWrapper::get_throughput),
- InstanceMethod("getInferenceDuration", &PerfMetricsWrapper::get_inference_duration),
- InstanceMethod("getGenerateDuration", &PerfMetricsWrapper::get_generate_duration),
- InstanceMethod("getTokenizationDuration", &PerfMetricsWrapper::get_tokenization_duration),
- InstanceMethod("getDetokenizationDuration", &PerfMetricsWrapper::get_detokenization_duration),
- InstanceMethod("getGrammarCompilerInitTimes", &PerfMetricsWrapper::get_grammar_compiler_init_times),
- InstanceMethod("getGrammarCompileTime", &PerfMetricsWrapper::get_grammar_compile_time),
- InstanceAccessor<&PerfMetricsWrapper::get_raw_metrics>("rawMetrics"),
- InstanceMethod("add", &PerfMetricsWrapper::add),
- });
+ auto properties = BasePerfMetricsWrapper::get_class_properties();
+ return DefineClass(env, "PerfMetrics", properties);
}
Napi::Object PerfMetricsWrapper::wrap(Napi::Env env, const ov::genai::PerfMetrics& metrics) {
@@ -43,143 +22,6 @@ Napi::Object PerfMetricsWrapper::wrap(Napi::Env env, const ov::genai::PerfMetric
return obj;
}
-Napi::Value PerfMetricsWrapper::get_load_time(const Napi::CallbackInfo& info) {
- VALIDATE_ARGS_COUNT(info, 0, "getLoadTime()");
- return Napi::Number::New(info.Env(), _metrics.get_load_time());
-}
-
-Napi::Value PerfMetricsWrapper::get_num_generated_tokens(const Napi::CallbackInfo& info) {
- VALIDATE_ARGS_COUNT(info, 0, "getNumGeneratedTokens()");
- return Napi::Number::New(info.Env(), _metrics.get_num_generated_tokens());
-}
-
-Napi::Value PerfMetricsWrapper::get_num_input_tokens(const Napi::CallbackInfo& info) {
- VALIDATE_ARGS_COUNT(info, 0, "getNumInputTokens()");
- return Napi::Number::New(info.Env(), _metrics.get_num_input_tokens());
-}
-
-Napi::Object create_mean_std_pair(Napi::Env env, const ov::genai::MeanStdPair& pair) {
- Napi::Object obj = Napi::Object::New(env);
- obj.Set("mean", Napi::Number::New(env, pair.mean));
- obj.Set("std", Napi::Number::New(env, pair.std));
- return obj;
-}
-
-Napi::Object create_summary_stats(Napi::Env env, const ov::genai::SummaryStats& stats) {
- Napi::Object obj = Napi::Object::New(env);
- obj.Set("mean", Napi::Number::New(env, stats.mean));
- obj.Set("std", Napi::Number::New(env, stats.std));
- obj.Set("min", Napi::Number::New(env, stats.min));
- obj.Set("max", Napi::Number::New(env, stats.max));
- return obj;
-}
-
-Napi::Value PerfMetricsWrapper::get_ttft(const Napi::CallbackInfo& info) {
- VALIDATE_ARGS_COUNT(info, 0, "getTTFT()");
- return create_mean_std_pair(info.Env(), _metrics.get_ttft());
-}
-
-Napi::Value PerfMetricsWrapper::get_tpot(const Napi::CallbackInfo& info) {
- VALIDATE_ARGS_COUNT(info, 0, "getTPOT()");
- return create_mean_std_pair(info.Env(), _metrics.get_tpot());
-}
-
-Napi::Value PerfMetricsWrapper::get_ipot(const Napi::CallbackInfo& info) {
- VALIDATE_ARGS_COUNT(info, 0, "getIPOT()");
- return create_mean_std_pair(info.Env(), _metrics.get_ipot());
-}
-
-Napi::Value PerfMetricsWrapper::get_throughput(const Napi::CallbackInfo& info) {
- VALIDATE_ARGS_COUNT(info, 0, "getThroughput()");
- return create_mean_std_pair(info.Env(), _metrics.get_throughput());
-}
-
-Napi::Value PerfMetricsWrapper::get_inference_duration(const Napi::CallbackInfo& info) {
- VALIDATE_ARGS_COUNT(info, 0, "getInferenceDuration()");
- return create_mean_std_pair(info.Env(), _metrics.get_inference_duration());
-}
-
-Napi::Value PerfMetricsWrapper::get_generate_duration(const Napi::CallbackInfo& info) {
- VALIDATE_ARGS_COUNT(info, 0, "getGenerateDuration()");
- return create_mean_std_pair(info.Env(), _metrics.get_generate_duration());
-}
-
-Napi::Value PerfMetricsWrapper::get_tokenization_duration(const Napi::CallbackInfo& info) {
- VALIDATE_ARGS_COUNT(info, 0, "getTokenizationDuration()");
- return create_mean_std_pair(info.Env(), _metrics.get_tokenization_duration());
-}
-
-Napi::Value PerfMetricsWrapper::get_grammar_compiler_init_times(const Napi::CallbackInfo& info) {
- VALIDATE_ARGS_COUNT(info, 0, "getGrammarCompilerInitTimes()");
- return cpp_map_to_js_object(info.Env(), _metrics.get_grammar_compiler_init_times());
-}
-
-Napi::Value PerfMetricsWrapper::get_grammar_compile_time(const Napi::CallbackInfo& info) {
- VALIDATE_ARGS_COUNT(info, 0, "getGrammarCompileTime()");
- return create_summary_stats(info.Env(), _metrics.get_grammar_compile_time());
-};
-
-Napi::Value PerfMetricsWrapper::get_detokenization_duration(const Napi::CallbackInfo& info) {
- VALIDATE_ARGS_COUNT(info, 0, "getDetokenizationDuration()");
- return create_mean_std_pair(info.Env(), _metrics.get_detokenization_duration());
-}
-
Napi::Value PerfMetricsWrapper::get_raw_metrics(const Napi::CallbackInfo& info) {
- Napi::Object obj = Napi::Object::New(info.Env());
- obj.Set("generateDurations",
- cpp_to_js, Napi::Value>(
- info.Env(),
- get_ms(_metrics.raw_metrics, &ov::genai::RawPerfMetrics::generate_durations)));
- obj.Set("tokenizationDurations",
- cpp_to_js, Napi::Value>(
- info.Env(),
- get_ms(_metrics.raw_metrics, &ov::genai::RawPerfMetrics::tokenization_durations)));
- obj.Set("detokenizationDurations",
- cpp_to_js, Napi::Value>(
- info.Env(),
- get_ms(_metrics.raw_metrics, &ov::genai::RawPerfMetrics::detokenization_durations)));
-
- obj.Set("timesToFirstToken",
- cpp_to_js, Napi::Value>(
- info.Env(),
- get_ms(_metrics.raw_metrics, &ov::genai::RawPerfMetrics::m_times_to_first_token)));
- obj.Set("newTokenTimes",
- cpp_to_js, Napi::Value>(
- info.Env(),
- timestamp_to_ms(_metrics.raw_metrics, &ov::genai::RawPerfMetrics::m_new_token_times)));
- obj.Set("tokenInferDurations",
- cpp_to_js, Napi::Value>(
- info.Env(),
- get_ms(_metrics.raw_metrics, &ov::genai::RawPerfMetrics::m_token_infer_durations)));
- obj.Set("batchSizes", cpp_to_js, Napi::Value>(info.Env(), _metrics.raw_metrics.m_batch_sizes));
- obj.Set("durations",
- cpp_to_js, Napi::Value>(
- info.Env(),
- get_ms(_metrics.raw_metrics, &ov::genai::RawPerfMetrics::m_durations)));
- obj.Set("inferenceDurations",
- cpp_to_js, Napi::Value>(
- info.Env(),
- get_ms(_metrics.raw_metrics, &ov::genai::RawPerfMetrics::m_inference_durations)));
-
- obj.Set("grammarCompileTimes",
- cpp_to_js, Napi::Value>(
- info.Env(),
- get_ms(_metrics.raw_metrics, &ov::genai::RawPerfMetrics::m_grammar_compile_times)));
-
- return obj;
-}
-
-Napi::Value PerfMetricsWrapper::add(const Napi::CallbackInfo& info) {
- VALIDATE_ARGS_COUNT(info, 1, "add()");
- const auto env = info.Env();
- try {
- _metrics += unwrap(env, info[0]);
- } catch (const std::exception& ex) {
- Napi::TypeError::New(env, ex.what()).ThrowAsJavaScriptException();
- }
- return info.This();
-}
-
-ov::genai::PerfMetrics& PerfMetricsWrapper::get_value() {
- return _metrics;
+ return BasePerfMetricsWrapper::get_raw_metrics(info);
}
diff --git a/src/js/src/vlm_pipeline/finish_chat_worker.cpp b/src/js/src/vlm_pipeline/finish_chat_worker.cpp
new file mode 100644
index 0000000000..764be64719
--- /dev/null
+++ b/src/js/src/vlm_pipeline/finish_chat_worker.cpp
@@ -0,0 +1,16 @@
+// Copyright (C) 2018-2025 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+
+#include "include/vlm_pipeline/finish_chat_worker.hpp"
+
+VLMFinishChatWorker::VLMFinishChatWorker(Function& callback, std::shared_ptr& pipe)
+ : AsyncWorker(callback),
+ pipe(pipe) {};
+
+void VLMFinishChatWorker::Execute() {
+ this->pipe->finish_chat();
+};
+
+void VLMFinishChatWorker::OnOK() {
+ Callback().Call({Env().Null()});
+};
diff --git a/src/js/src/vlm_pipeline/init_worker.cpp b/src/js/src/vlm_pipeline/init_worker.cpp
new file mode 100644
index 0000000000..49e93608da
--- /dev/null
+++ b/src/js/src/vlm_pipeline/init_worker.cpp
@@ -0,0 +1,32 @@
+// Copyright (C) 2018-2025 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+
+#include "include/vlm_pipeline/init_worker.hpp"
+
+VLMInitWorker::VLMInitWorker(Function& callback,
+ std::shared_ptr& pipe,
+ std::shared_ptr is_initializing,
+ const std::string model_path,
+ const std::string device,
+ const ov::AnyMap properties)
+ : AsyncWorker(callback),
+ pipe(pipe),
+ is_initializing(is_initializing),
+ model_path(model_path),
+ device(device),
+ properties(properties) {};
+
+void VLMInitWorker::Execute() {
+ *this->is_initializing = true;
+ this->pipe = std::make_shared(this->model_path, this->device, this->properties);
+};
+
+void VLMInitWorker::OnOK() {
+ *this->is_initializing = false;
+ Callback().Call({Env().Null()});
+};
+
+void VLMInitWorker::OnError(const Error& e) {
+ *this->is_initializing = false;
+ Callback().Call({Napi::Error::New(Env(), e.Message()).Value()});
+};
diff --git a/src/js/src/vlm_pipeline/perf_metrics.cpp b/src/js/src/vlm_pipeline/perf_metrics.cpp
new file mode 100644
index 0000000000..6e2a258df8
--- /dev/null
+++ b/src/js/src/vlm_pipeline/perf_metrics.cpp
@@ -0,0 +1,48 @@
+// Copyright (C) 2018-2025 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+
+#include "include/vlm_pipeline/perf_metrics.hpp"
+
+#include "include/addon.hpp"
+#include "include/helper.hpp"
+
+using ov::genai::common_bindings::utils::get_ms;
+
+VLMPerfMetricsWrapper::VLMPerfMetricsWrapper(const Napi::CallbackInfo& info)
+ : BasePerfMetricsWrapper(info) {}
+
+Napi::Function VLMPerfMetricsWrapper::get_class(Napi::Env env) {
+ auto properties = BasePerfMetricsWrapper::get_class_properties();
+ properties.push_back(
+ InstanceMethod("getPrepareEmbeddingsDuration", &VLMPerfMetricsWrapper::get_prepare_embeddings_duration));
+ properties.push_back(InstanceAccessor<&VLMPerfMetricsWrapper::get_vlm_raw_metrics>("vlmRawMetrics"));
+ return DefineClass(env, "VLMPerfMetrics", properties);
+}
+
+Napi::Object VLMPerfMetricsWrapper::wrap(Napi::Env env, const ov::genai::VLMPerfMetrics& metrics) {
+ const auto& prototype = env.GetInstanceData()->vlm_perf_metrics;
+ OPENVINO_ASSERT(prototype, "Invalid pointer to prototype.");
+ auto obj = prototype.New({});
+ const auto m_ptr = Napi::ObjectWrap::Unwrap(obj);
+ m_ptr->_metrics = metrics;
+ return obj;
+}
+
+Napi::Value VLMPerfMetricsWrapper::get_prepare_embeddings_duration(const Napi::CallbackInfo& info) {
+ VALIDATE_ARGS_COUNT(info, 0, "getPrepareEmbeddingsDuration()");
+ return perf_utils::create_mean_std_pair(info.Env(), _metrics.get_prepare_embeddings_duration());
+}
+
+Napi::Value VLMPerfMetricsWrapper::get_raw_metrics(const Napi::CallbackInfo& info) {
+ return BasePerfMetricsWrapper::get_raw_metrics(info);
+}
+
+Napi::Value VLMPerfMetricsWrapper::get_vlm_raw_metrics(const Napi::CallbackInfo& info) {
+ Napi::Object obj = Napi::Object::New(info.Env());
+ obj.Set("prepareEmbeddingsDurations",
+ cpp_to_js, Napi::Value>(
+ info.Env(),
+ get_ms(_metrics.vlm_raw_metrics, &ov::genai::VLMRawPerfMetrics::prepare_embeddings_durations)));
+
+ return obj;
+}
diff --git a/src/js/src/vlm_pipeline/start_chat_worker.cpp b/src/js/src/vlm_pipeline/start_chat_worker.cpp
new file mode 100644
index 0000000000..bbce8cf210
--- /dev/null
+++ b/src/js/src/vlm_pipeline/start_chat_worker.cpp
@@ -0,0 +1,19 @@
+// Copyright (C) 2018-2025 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+
+#include "include/vlm_pipeline/start_chat_worker.hpp"
+
+VLMStartChatWorker::VLMStartChatWorker(Function& callback,
+ std::shared_ptr& pipe,
+ std::string system_message)
+ : AsyncWorker(callback),
+ pipe(pipe),
+ system_message(system_message) {};
+
+void VLMStartChatWorker::Execute() {
+ this->pipe->start_chat(this->system_message);
+};
+
+void VLMStartChatWorker::OnOK() {
+ Callback().Call({Env().Null()});
+};
diff --git a/src/js/src/vlm_pipeline/vlm_pipeline_wrapper.cpp b/src/js/src/vlm_pipeline/vlm_pipeline_wrapper.cpp
new file mode 100644
index 0000000000..cc1c14fbbc
--- /dev/null
+++ b/src/js/src/vlm_pipeline/vlm_pipeline_wrapper.cpp
@@ -0,0 +1,287 @@
+// Copyright (C) 2018-2025 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+
+#include "include/vlm_pipeline/vlm_pipeline_wrapper.hpp"
+
+#include
+
+#include "include/addon.hpp"
+#include "include/helper.hpp"
+#include "include/tokenizer.hpp"
+#include "include/vlm_pipeline/finish_chat_worker.hpp"
+#include "include/vlm_pipeline/init_worker.hpp"
+#include "include/vlm_pipeline/perf_metrics.hpp"
+#include "include/vlm_pipeline/start_chat_worker.hpp"
+
+struct VLMTsfnContext {
+ VLMTsfnContext(std::string prompt, std::shared_ptr is_generating)
+ : prompt(prompt),
+ is_generating(is_generating) {};
+ ~VLMTsfnContext() {};
+
+ std::thread native_thread;
+ Napi::ThreadSafeFunction callback;
+ std::optional streamer;
+
+ std::string prompt;
+ std::vector images;
+ std::vector videos;
+ std::shared_ptr is_generating;
+ std::shared_ptr pipe = nullptr;
+ std::shared_ptr generation_config = nullptr;
+};
+
+void vlmPerformInferenceThread(VLMTsfnContext* context) {
+ auto report_error = [context](const std::string& message) {
+ auto status = context->callback.BlockingCall([message](Napi::Env env, Napi::Function jsCallback) {
+ try {
+ jsCallback.Call(
+ {Napi::Error::New(env, "vlmPerformInferenceThread error. " + message).Value(), env.Null()});
+ } catch (std::exception& err) {
+ std::cerr << "The callback failed when attempting to return an error from vlmPerformInferenceThread. "
+ "Details:\n"
+ << err.what() << std::endl;
+ std::cerr << "Original error message:\n" << message << std::endl;
+ }
+ });
+ if (status != napi_ok) {
+ std::cerr << "The BlockingCall failed with status " << status
+ << " when trying to return an error from vlmPerformInferenceThread." << std::endl;
+ std::cerr << "Original error message:\n" << message << std::endl;
+ }
+ };
+ auto finalize = [context]() {
+ *context->is_generating = false;
+ context->callback.Release();
+ if (context->streamer.has_value()) {
+ context->streamer->Release();
+ }
+ };
+ try {
+ ov::genai::GenerationConfig config;
+ config.update_generation_config(*context->generation_config);
+
+ ov::genai::StreamerVariant streamer = std::monostate();
+ std::vector streamer_exceptions;
+ if (context->streamer.has_value()) {
+ streamer = [context, &streamer_exceptions](std::string word) {
+ std::promise resultPromise;
+ napi_status status = context->streamer->BlockingCall(
+ [word, &resultPromise, &streamer_exceptions](Napi::Env env, Napi::Function jsCallback) {
+ try {
+ auto callback_result = jsCallback.Call({Napi::String::New(env, word)});
+ if (callback_result.IsNumber()) {
+ resultPromise.set_value(static_cast(
+ callback_result.As().Int32Value()));
+ } else {
+ resultPromise.set_value(ov::genai::StreamingStatus::RUNNING);
+ }
+ } catch (std::exception& err) {
+ streamer_exceptions.push_back(err.what());
+ resultPromise.set_value(ov::genai::StreamingStatus::CANCEL);
+ }
+ });
+
+ if (status != napi_ok) {
+ streamer_exceptions.push_back("The streamer callback BlockingCall failed with the status: " +
+ status);
+ return ov::genai::StreamingStatus::CANCEL;
+ }
+
+ return resultPromise.get_future().get();
+ };
+ }
+
+ ov::genai::VLMDecodedResults result;
+
+ result = context->pipe->generate(context->prompt, context->images, context->videos, config, streamer);
+
+ if (!streamer_exceptions.empty()) {
+ // If there were exceptions from the streamer, report them all as a single error and finish without result
+ std::string combined_error = "Streamer exceptions occurred:\n";
+ for (size_t i = 0; i < streamer_exceptions.size(); ++i) {
+ combined_error += "[" + std::to_string(i + 1) + "] " + streamer_exceptions[i] + "\n";
+ }
+ report_error(combined_error);
+ } else {
+ // If no exceptions from streamer, call the final callback with the result
+ napi_status status =
+ context->callback.BlockingCall([result, &report_error](Napi::Env env, Napi::Function jsCallback) {
+ try {
+ jsCallback.Call({
+ env.Null(), // Error should be null in normal case
+ to_vlm_decoded_result(env, result) // Return DecodedResults as the final result
+ });
+ } catch (std::exception& err) {
+ report_error("The final callback failed. Details:\n" + std::string(err.what()));
+ }
+ });
+
+ if (status != napi_ok) {
+ report_error("The final BlockingCall failed with status " + status);
+ }
+ }
+ finalize();
+ } catch (std::exception& e) {
+ report_error(e.what());
+ finalize();
+ }
+}
+
+VLMPipelineWrapper::VLMPipelineWrapper(const Napi::CallbackInfo& info) : Napi::ObjectWrap(info) {};
+
+Napi::Function VLMPipelineWrapper::get_class(Napi::Env env) {
+ return DefineClass(env,
+ "VLMPipeline",
+ {InstanceMethod("init", &VLMPipelineWrapper::init),
+ InstanceMethod("generate", &VLMPipelineWrapper::generate),
+ InstanceMethod("getTokenizer", &VLMPipelineWrapper::get_tokenizer),
+ InstanceMethod("startChat", &VLMPipelineWrapper::start_chat),
+ InstanceMethod("finishChat", &VLMPipelineWrapper::finish_chat),
+ InstanceMethod("setChatTemplate", &VLMPipelineWrapper::set_chat_template),
+ InstanceMethod("setGenerationConfig", &VLMPipelineWrapper::set_generation_config)});
+}
+
+Napi::Value VLMPipelineWrapper::init(const Napi::CallbackInfo& info) {
+ auto env = info.Env();
+ try {
+ OPENVINO_ASSERT(!this->pipe, "Pipeline is already initialized");
+ OPENVINO_ASSERT(!*this->is_initializing, "Pipeline is already initializing");
+ VALIDATE_ARGS_COUNT(info, 4, "init()");
+ const std::string model_path = js_to_cpp(env, info[0]);
+ const std::string device = js_to_cpp(env, info[1]);
+ const auto& properties = js_to_cpp(env, info[2]);
+ OPENVINO_ASSERT(info[3].IsFunction(), "init callback is not a function");
+ Napi::Function callback = info[3].As();
+
+ VLMInitWorker* asyncWorker =
+ new VLMInitWorker(callback, this->pipe, this->is_initializing, model_path, device, properties);
+ asyncWorker->Queue();
+ } catch (const std::exception& ex) {
+ Napi::Error::New(env, ex.what()).ThrowAsJavaScriptException();
+ }
+ return env.Undefined();
+}
+
+Napi::Value VLMPipelineWrapper::generate(const Napi::CallbackInfo& info) {
+ auto env = info.Env();
+ try {
+ OPENVINO_ASSERT(this->pipe, "VLMPipeline is not initialized");
+ OPENVINO_ASSERT(!*this->is_generating, "Another generation is already in progress");
+ *this->is_generating = true;
+ VALIDATE_ARGS_COUNT(info, 6, "generate()");
+ VLMTsfnContext* context = nullptr;
+
+ // Arguments: prompt, images, videos, streamer, generationConfig, callback
+ auto prompt = js_to_cpp(env, info[0]);
+ auto images = js_to_cpp>(env, info[1]);
+ auto videos = js_to_cpp>(env, info[2]);
+ OPENVINO_ASSERT(info[3].IsFunction() || info[3].IsUndefined(), "generate callback is not a function");
+ auto streamer = info[3].As();
+ auto generation_config = js_to_cpp(env, info[4]);
+ OPENVINO_ASSERT(info[5].IsFunction(), "generate callback is not a function");
+ auto callback = info[5].As();
+
+ context = new VLMTsfnContext(prompt, this->is_generating);
+ context->images = std::move(images);
+ context->videos = std::move(videos);
+ context->pipe = this->pipe;
+ context->generation_config = std::make_shared(generation_config);
+
+ context->callback =
+ Napi::ThreadSafeFunction::New(env,
+ callback, // JavaScript function called asynchronously
+ "VLM_generate_callback", // Name
+ 0, // Unlimited queue
+ 1, // Only one thread will use this initially
+ [context, this](Napi::Env) { // Finalizer used to clean threads up
+ context->native_thread.join();
+ delete context;
+ });
+ if (!streamer.IsUndefined()) {
+ context->streamer = Napi::ThreadSafeFunction::New(env,
+ streamer, // JavaScript function called asynchronously
+ "VLM_generate_streamer", // Name
+ 0, // Unlimited queue
+ 1); // Only one thread will use this initially
+ }
+ context->native_thread = std::thread(vlmPerformInferenceThread, context);
+ } catch (const std::exception& ex) {
+ Napi::Error::New(env, ex.what()).ThrowAsJavaScriptException();
+ *this->is_generating = false;
+ }
+ return env.Undefined();
+}
+
+Napi::Value VLMPipelineWrapper::start_chat(const Napi::CallbackInfo& info) {
+ auto env = info.Env();
+ try {
+ OPENVINO_ASSERT(this->pipe, "VLMPipeline is not initialized");
+ VALIDATE_ARGS_COUNT(info, 2, "startChat()");
+ auto system_message = js_to_cpp(env, info[0]);
+ OPENVINO_ASSERT(info[1].IsFunction(), "startChat callback is not a function");
+ auto callback = info[1].As();
+
+ VLMStartChatWorker* asyncWorker = new VLMStartChatWorker(callback, this->pipe, system_message);
+ asyncWorker->Queue();
+ } catch (const std::exception& ex) {
+ Napi::Error::New(env, ex.what()).ThrowAsJavaScriptException();
+ }
+ return env.Undefined();
+}
+
+Napi::Value VLMPipelineWrapper::finish_chat(const Napi::CallbackInfo& info) {
+ auto env = info.Env();
+ try {
+ OPENVINO_ASSERT(this->pipe, "VLMPipeline is not initialized");
+ VALIDATE_ARGS_COUNT(info, 1, "finishChat()");
+ OPENVINO_ASSERT(info[0].IsFunction(), "finishChat callback is not a function");
+ Napi::Function callback = info[0].As();
+
+ VLMFinishChatWorker* asyncWorker = new VLMFinishChatWorker(callback, this->pipe);
+ asyncWorker->Queue();
+ } catch (const std::exception& ex) {
+ Napi::Error::New(env, ex.what()).ThrowAsJavaScriptException();
+ }
+ return env.Undefined();
+}
+
+Napi::Value VLMPipelineWrapper::get_tokenizer(const Napi::CallbackInfo& info) {
+ auto env = info.Env();
+ try {
+ OPENVINO_ASSERT(this->pipe, "VLMPipeline is not initialized");
+ auto tokenizer = this->pipe->get_tokenizer();
+ return TokenizerWrapper::wrap(env, tokenizer);
+ } catch (const std::exception& ex) {
+ Napi::Error::New(env, ex.what()).ThrowAsJavaScriptException();
+ }
+ return env.Undefined();
+}
+
+Napi::Value VLMPipelineWrapper::set_chat_template(const Napi::CallbackInfo& info) {
+ auto env = info.Env();
+ try {
+ OPENVINO_ASSERT(this->pipe, "VLMPipeline is not initialized");
+ VALIDATE_ARGS_COUNT(info, 1, "setChatTemplate()");
+ auto chat_template = js_to_cpp(env, info[0]);
+ this->pipe->set_chat_template(chat_template);
+ } catch (const std::exception& ex) {
+ Napi::Error::New(env, ex.what()).ThrowAsJavaScriptException();
+ }
+ return env.Undefined();
+}
+
+Napi::Value VLMPipelineWrapper::set_generation_config(const Napi::CallbackInfo& info) {
+ auto env = info.Env();
+ try {
+ OPENVINO_ASSERT(this->pipe, "VLMPipeline is not initialized");
+ VALIDATE_ARGS_COUNT(info, 1, "setGenerationConfig()");
+ auto config_map = js_to_cpp(env, info[0]);
+ ov::genai::GenerationConfig config;
+ config.update_generation_config(config_map);
+ this->pipe->set_generation_config(config);
+ } catch (const std::exception& ex) {
+ Napi::Error::New(env, ex.what()).ThrowAsJavaScriptException();
+ }
+ return env.Undefined();
+}
diff --git a/src/js/tests/models.js b/src/js/tests/models.js
index f3daf59986..5fef7cda40 100644
--- a/src/js/tests/models.js
+++ b/src/js/tests/models.js
@@ -2,4 +2,5 @@ export const models = {
LLM: "OpenVINO/Llama-3.1-8B-Instruct-FastDraft-150M-int8-ov",
InstructLLM: "OpenVINO/Qwen2.5-1.5B-Instruct-int4-ov",
Embedding: "OpenVINO/bge-base-en-v1.5-fp16-ov",
+ VLM: "OpenVINO/Qwen2-VL-7B-Instruct-int4-ov",
};
diff --git a/src/js/tests/utils.js b/src/js/tests/utils.js
index be886b4bcb..0416941d99 100644
--- a/src/js/tests/utils.js
+++ b/src/js/tests/utils.js
@@ -1,6 +1,7 @@
import { bootstrap } from "global-agent";
import { promises as fs } from "node:fs";
import { listFiles, downloadFile } from "@huggingface/hub";
+import { addon as ov } from "openvino-node";
const BASE_DIR = "./tests/models/";
@@ -45,3 +46,60 @@ async function saveFile(file, response) {
await fs.writeFile(file, Buffer.from(arrayBuffer));
}
+
+/**
+ * Creates a synthetic test image tensor with a gradient pattern.
+ *
+ * Generates a small RGB image filled with a gradient pattern for testing VLM pipelines.
+ * The red channel varies by height, green by width, and blue is constant.
+ *
+ * @param height - Height of the image in pixels. (default: 32)
+ * @param width - Width of the image in pixels. (default: 32)
+ * @returns An OpenVINO Tensor with shape [height, width, channels] and uint8 data type.
+ */
+export function createTestImageTensor(height = 32, width = 32) {
+ const channels = 3;
+ const data = new Uint8Array(height * width * channels);
+
+ // Fill with gradient pattern
+ for (let h = 0; h < height; h++) {
+ for (let w = 0; w < width; w++) {
+ const idx = (h * width + w) * channels;
+ data[idx] = h * 8; // R
+ data[idx + 1] = w * 8; // G
+ data[idx + 2] = 128; // B
+ }
+ }
+
+ return new ov.Tensor("u8", [height, width, channels], data);
+}
+
+/**
+ * Creates a synthetic test video tensor with multiple frames.
+ *
+ * Generates a video tensor with a synthetic pattern that varies across frames.
+ * Each frame has a slightly different color pattern to simulate temporal variation.
+ * Useful for testing VLM pipelines with video inputs.
+ *
+ * @param frames - Number of video frames to generate. (default: 4)
+ * @param height - Height of each frame in pixels. (default: 32)
+ * @param width - Width of each frame in pixels. (default: 32)
+ * @returns An OpenVINO Tensor with shape [frames, height, width, channels] and uint8 data type.
+ */
+export function createTestVideoTensor(frames = 4, height = 32, width = 32) {
+ const channels = 3;
+ const data = new Uint8Array(frames * height * width * channels);
+
+ for (let f = 0; f < frames; f++) {
+ for (let h = 0; h < height; h++) {
+ for (let w = 0; w < width; w++) {
+ const idx = (f * height * width + h * width + w) * channels;
+ data[idx] = (h + f * 10) % 256;
+ data[idx + 1] = (w + f * 10) % 256;
+ data[idx + 2] = 128;
+ }
+ }
+ }
+
+ return new ov.Tensor("u8", [frames, height, width, channels], data);
+}
diff --git a/src/js/tests/vlmPipeline.test.js b/src/js/tests/vlmPipeline.test.js
new file mode 100644
index 0000000000..8611b772e8
--- /dev/null
+++ b/src/js/tests/vlmPipeline.test.js
@@ -0,0 +1,153 @@
+// Copyright (C) 2018-2025 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+
+import { Tokenizer, VLMPipeline, DecodedResults, VLMDecodedResults } from "../dist/index.js";
+
+import assert from "node:assert";
+import { describe, it, before } from "node:test";
+import { models } from "./models.js";
+import { createTestImageTensor, createTestVideoTensor } from "./utils.js";
+
+const MODEL_PATH = process.env.VLM_MODEL_PATH || `./tests/models/${models.VLM.split("/")[1]}`;
+
+// Skip tests on macOS due to insufficient memory
+describe("VLMPipeline", { skip: process.platform === "darwin" }, () => {
+ let pipeline, testImage1, testImage2, testVideo1, testVideo2;
+
+ before(async () => {
+ pipeline = await VLMPipeline(MODEL_PATH, "CPU");
+ pipeline.setGenerationConfig({ max_new_tokens: 10 });
+ testImage1 = createTestImageTensor();
+ testImage2 = createTestImageTensor(50, 50);
+ testVideo1 = createTestVideoTensor();
+ testVideo2 = createTestVideoTensor(6, 64, 64);
+ });
+
+ it("should generate text without images", async () => {
+ const result = await pipeline.generate("What is 2+2?");
+
+ assert.ok(result instanceof DecodedResults, "Result should be instance of DecodedResults");
+ assert.ok(
+ result instanceof VLMDecodedResults,
+ "Result should be instance of VLMDecodedResults",
+ );
+ assert.ok(result.texts.length > 0, "Should generate some output");
+ });
+
+ it("should generate text with images", async () => {
+ const result = await pipeline.generate("Compare these two images.", {
+ images: [testImage1, testImage2],
+ });
+
+ assert.strictEqual(result.texts.length, 1, "Should generate comparison");
+ });
+
+ it("should generate text with video input", async () => {
+ const result = await pipeline.generate("Describe what happens in this video.", {
+ videos: [testVideo1],
+ generationConfig: {
+ max_new_tokens: 20,
+ temperature: 0,
+ },
+ });
+
+ assert.strictEqual(result.texts.length, 1);
+ });
+
+ it("should generate with both image and video", async () => {
+ const result = await pipeline.generate("Compare the image and video.", {
+ images: [testImage1],
+ videos: [testVideo2],
+ generationConfig: { max_new_tokens: 20, temperature: 0 },
+ });
+
+ assert.strictEqual(result.texts.length, 1);
+ });
+
+ it("throw error on invalid streamer", async () => {
+ await assert.rejects(
+ pipeline.generate("What is 2+2?", {
+ streamer: () => {
+ throw new Error("Test error");
+ },
+ }),
+ /Test error/,
+ );
+ });
+
+ it("throw error with invalid generationConfig", async () => {
+ await assert.rejects(
+ pipeline.generate("What is 2+2?", {
+ generationConfig: { max_new_tokens: "five" },
+ }),
+ /vlmPerformInferenceThread error/,
+ );
+ });
+
+ it("should support streaming generation", async () => {
+ const chunks = [];
+
+ const stream = pipeline.stream("What do you see?", {
+ images: [testImage1],
+ generationConfig: {
+ max_new_tokens: 15,
+ temperature: 0,
+ },
+ });
+
+ for await (const chunk of stream) {
+ chunks.push(chunk);
+ }
+
+ assert.ok(chunks.length > 0, "Should receive streaming chunks");
+ const fullOutput = chunks.join("");
+ assert.ok(fullOutput.length > 0, "Combined chunks should form output");
+ });
+
+ it("should return VLMDecodedResults with perfMetrics", async () => {
+ const result = await pipeline.generate("Describe the image.", {
+ images: [testImage2],
+ generationConfig: {
+ max_new_tokens: 10,
+ temperature: 0,
+ },
+ });
+
+ assert.ok(result, "Should return result");
+ assert.ok(result.perfMetrics, "Should have perfMetrics");
+ // Property from base PerformanceMetrics
+ const numTokens = result.perfMetrics.getNumGeneratedTokens();
+ assert.ok(typeof numTokens === "number", "getNumGeneratedTokens should return number");
+ assert.ok(
+ 0 < numTokens && numTokens <= 10,
+ "Number of tokens should be between 0 and max_new_tokens",
+ );
+ // VLM-specific properties
+ const prepareEmbeddings = result.perfMetrics.getPrepareEmbeddingsDuration();
+ assert.ok(
+ typeof prepareEmbeddings.mean === "number",
+ "PrepareEmbeddingsDuration should have mean",
+ );
+ const { prepareEmbeddingsDurations } = result.perfMetrics.vlmRawMetrics;
+ assert.ok(
+ Array.isArray(prepareEmbeddingsDurations),
+ "Should have duration of preparation of embeddings",
+ );
+ assert.ok(prepareEmbeddingsDurations.length > 0, "Should have at least one duration value");
+ });
+
+ it("should get tokenizer from pipeline", () => {
+ const tokenizer = pipeline.getTokenizer();
+ assert.ok(tokenizer instanceof Tokenizer, "Should return tokenizer");
+ });
+
+ it("should start and finish chat", async () => {
+ await pipeline.startChat("You are an assistant named Tom.");
+ const result1 = await pipeline.generate("What is your name?");
+ assert.ok(/Tom/.test(result1.toString()));
+
+ await pipeline.finishChat();
+ const result2 = await pipeline.generate("What is your name?");
+ assert.ok(!/Tom/.test(result2.toString()));
+ });
+});