diff --git a/site/docs/bindings/node-js.md b/site/docs/bindings/node-js.md
index 7abaef3504..ece443fbaf 100644
--- a/site/docs/bindings/node-js.md
+++ b/site/docs/bindings/node-js.md
@@ -9,7 +9,7 @@ description: Node.js bindings provide JavaScript/TypeScript API.
 OpenVINO GenAI provides Node.js bindings that enable you to use generative AI pipelines in JavaScript and TypeScript applications.
 
 :::warning API Coverage
-Node.js bindings currently provide a subset of the full OpenVINO GenAI API available in C++ and Python. The focus is on core text generation (`LLMPipeline`) and text embedding (`TextEmbeddingPipeline`) functionality.
+Node.js bindings currently provide a subset of the full OpenVINO GenAI API available in C++ and Python. The focus is on core text generation (`LLMPipeline`), vision language models (`VLMPipeline`), and text embedding (`TextEmbeddingPipeline`) functionality.
 :::
 
 ## Supported Pipelines and Features
@@ -23,6 +23,10 @@ Node.js bindings currently support:
   - Multiple sampling strategies (greedy, beam search)
   - Structured output
   - ReAct agent support
+- `VLMPipeline`: Vision Language Model inference for multimodal tasks
+  - Process images and videos with text prompts
+  - Chat mode with conversation history
+  - Streaming support
 - `TextEmbeddingPipeline`: Generate text embeddings for semantic search and RAG applications
 - `Tokenizer`: Fast tokenization / detokenization and chat prompt formatting
   - Encode strings into token id and attention mask tensors
diff --git a/site/docs/use-cases/image-processing/_sections/_run_model/_code_example_js.mdx b/site/docs/use-cases/image-processing/_sections/_run_model/_code_example_js.mdx
new file mode 100644
index 0000000000..f2a495c1e7
--- /dev/null
+++ b/site/docs/use-cases/image-processing/_sections/_run_model/_code_example_js.mdx
@@ -0,0 +1,40 @@
+import CodeBlock from '@theme/CodeBlock';
+
+<CodeBlock language="javascript" showLineNumbers>
+{`import { addon as ov } from "openvino-node";
+import { VLMPipeline } from "openvino-genai-node";
+import { stat, readdir } from "node:fs/promises";
+import sharp from "sharp";
+import path from "node:path";
+
+async function readImage(imagePath) {
+    const img = sharp(imagePath);
+    const metadata = await img.metadata();
+    const { width, height, channels } = metadata;
+    const imageBuffer = await img.raw().toBuffer();
+    return new ov.Tensor(ov.element.u8, [height, width, channels], imageBuffer);
+}
+
+async function readImages(imagePath) {
+    const stats = await stat(imagePath);
+    if (stats.isDirectory()) {
+        const files = await readdir(imagePath);
+        return Promise.all(files.sort().map((file) => readImage(path.join(imagePath, file))));
+    }
+    return [await readImage(imagePath)];
+}
+
+const images = await readImages("./images");
+
+const pipe = await VLMPipeline(modelPath, "${props.device || 'CPU'}");
+
+const result = await pipe.generate(prompt, {
+    images,
+    generationConfig: { max_new_tokens: 100 },
+});
+console.log(result.texts[0]);
+
+// To input videos frames, use 'videos' option, frames tensor layout = [Frame, H, W, C]
+// const result = await pipe.generate(prompt, { videos: [frames], generationConfig: { max_new_tokens: 100 } });
+`}
+</CodeBlock>
diff --git a/site/docs/use-cases/image-processing/_sections/_run_model/index.mdx b/site/docs/use-cases/image-processing/_sections/_run_model/index.mdx
index b5082eb1ef..284193977c 100644
--- a/site/docs/use-cases/image-processing/_sections/_run_model/index.mdx
+++ b/site/docs/use-cases/image-processing/_sections/_run_model/index.mdx
@@ -1,5 +1,6 @@
 import CodeExampleCPP from './_code_example_cpp.mdx';
 import CodeExamplePython from './_code_example_python.mdx';
+import CodeExampleJS from './_code_example_js.mdx';
 
 ## Run Model Using OpenVINO GenAI
 
@@ -27,6 +28,16 @@ It can generate text from a text prompt and images as inputs.
             </TabItem>
         </Tabs>
     </TabItemCpp>
+    <TabItemJS>
+        <Tabs groupId="device">
+            <TabItem label="CPU" value="cpu">
+                <CodeExampleJS device="CPU" />
+            </TabItem>
+            <TabItem label="GPU" value="gpu">
+                <CodeExampleJS device="GPU" />
+            </TabItem>
+        </Tabs>
+    </TabItemJS>
 </LanguageTabs>
 
 :::tip
diff --git a/site/docs/use-cases/image-processing/_sections/_usage_options/index.mdx b/site/docs/use-cases/image-processing/_sections/_usage_options/index.mdx
index 6ef41e98f1..b953df27bf 100644
--- a/site/docs/use-cases/image-processing/_sections/_usage_options/index.mdx
+++ b/site/docs/use-cases/image-processing/_sections/_usage_options/index.mdx
@@ -81,6 +81,28 @@ Similar to [text generation](/docs/use-cases/text-generation/#use-different-gene
           }
           ```
       </TabItemCpp>
+      <TabItemJS>
+          ```javascript
+          import { VLMPipeline }  from 'openvino-genai-node';
+
+          const pipe = await VLMPipeline(modelPath, "CPU", {});
+
+          // Create custom generation configuration
+          const config = {
+              max_new_tokens: 100,
+              temperature: 0.7,
+              top_k: 50,
+              top_p: 0.9,
+              repetition_penalty: 1.2
+          };
+
+          // Generate text with custom configuration
+          const output = await pipe.generate(prompt, {
+              images: images,
+              generationConfig: config
+          });
+          ```
+      </TabItemJS>
   </LanguageTabs>
 </BasicGenerationConfiguration>
 
diff --git a/site/src/pages/_sections/UseCasesSection/components/image-processing.tsx b/site/src/pages/_sections/UseCasesSection/components/image-processing.tsx
index 50786c9ff3..010150c5a2 100644
--- a/site/src/pages/_sections/UseCasesSection/components/image-processing.tsx
+++ b/site/src/pages/_sections/UseCasesSection/components/image-processing.tsx
@@ -1,10 +1,11 @@
 import Button from '@site/src/components/Button';
-import { LanguageTabs, TabItemCpp, TabItemPython } from '@site/src/components/LanguageTabs';
+import { LanguageTabs, TabItemCpp, TabItemPython, TabItemJS } from '@site/src/components/LanguageTabs';
 
 import UseCaseCard from './UseCaseCard';
 
 import CodeExampleCpp from '@site/docs/use-cases/image-processing/_sections/_run_model/_code_example_cpp.mdx';
 import CodeExamplePython from '@site/docs/use-cases/image-processing/_sections/_run_model/_code_example_python.mdx';
+import CodeExampleJS from '@site/docs/use-cases/image-processing/_sections/_run_model/_code_example_js.mdx';
 
 export const ImageProcessing = () => (
   <UseCaseCard>
@@ -27,6 +28,9 @@ export const ImageProcessing = () => (
         <TabItemCpp>
           <CodeExampleCpp />
         </TabItemCpp>
+        <TabItemJS>
+          <CodeExampleJS />
+        </TabItemJS>
       </LanguageTabs>
     </UseCaseCard.Code>
     <UseCaseCard.Actions>
diff --git a/src/js/include/addon.hpp b/src/js/include/addon.hpp
index 28371ba822..c9b89cc610 100644
--- a/src/js/include/addon.hpp
+++ b/src/js/include/addon.hpp
@@ -9,8 +9,10 @@ typedef Napi::Function (*Prototype)(Napi::Env);
 
 struct AddonData {
     Napi::FunctionReference core;
+    Napi::FunctionReference vlm_pipeline;
     Napi::FunctionReference tokenizer;
     Napi::FunctionReference perf_metrics;
+    Napi::FunctionReference vlm_perf_metrics;
     Napi::FunctionReference chat_history;
     Napi::ObjectReference openvino_addon;
 };
diff --git a/src/js/include/base/perf_metrics.hpp b/src/js/include/base/perf_metrics.hpp
new file mode 100644
index 0000000000..8d85266e14
--- /dev/null
+++ b/src/js/include/base/perf_metrics.hpp
@@ -0,0 +1,261 @@
+// Copyright (C) 2018-2025 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+
+#pragma once
+
+#include <napi.h>
+
+#include "bindings_utils.hpp"
+#include "include/helper.hpp"
+#include "openvino/genai/perf_metrics.hpp"
+
+using ov::genai::common_bindings::utils::get_ms;
+using ov::genai::common_bindings::utils::timestamp_to_ms;
+
+namespace perf_utils {
+
+inline Napi::Object create_mean_std_pair(Napi::Env env, const ov::genai::MeanStdPair& pair) {
+    Napi::Object obj = Napi::Object::New(env);
+    obj.Set("mean", Napi::Number::New(env, pair.mean));
+    obj.Set("std", Napi::Number::New(env, pair.std));
+    return obj;
+}
+
+inline Napi::Object create_summary_stats(Napi::Env env, const ov::genai::SummaryStats& stats) {
+    Napi::Object obj = Napi::Object::New(env);
+    obj.Set("mean", Napi::Number::New(env, stats.mean));
+    obj.Set("std", Napi::Number::New(env, stats.std));
+    obj.Set("min", Napi::Number::New(env, stats.min));
+    obj.Set("max", Napi::Number::New(env, stats.max));
+    return obj;
+}
+
+}  // namespace perf_utils
+
+/**
+ * @brief Base template class for PerfMetrics wrappers.
+ *
+ * This class provides common functionality for wrapping ov::genai::PerfMetrics
+ * and derived classes (like VLMPerfMetrics) in Node.js addon.
+ *
+ * @tparam T The derived wrapper class (CRTP pattern).
+ * @tparam MetricsType The type of metrics to store (default: ov::genai::PerfMetrics).
+ */
+template <class T, class MetricsType = ov::genai::PerfMetrics>
+class BasePerfMetricsWrapper : public Napi::ObjectWrap<T> {
+public:
+    using PropertyDescriptor = typename Napi::ObjectWrap<T>::PropertyDescriptor;
+
+    BasePerfMetricsWrapper(const Napi::CallbackInfo& info);
+    virtual ~BasePerfMetricsWrapper() {}
+
+    /**
+     * @brief Returns a vector of base class property descriptors.
+     *
+     * Derived classes can use this to get all base methods and add their own.
+     */
+    static std::vector<PropertyDescriptor> get_class_properties();
+
+    Napi::Value get_load_time(const Napi::CallbackInfo& info);
+    Napi::Value get_num_generated_tokens(const Napi::CallbackInfo& info);
+    Napi::Value get_num_input_tokens(const Napi::CallbackInfo& info);
+    Napi::Value get_ttft(const Napi::CallbackInfo& info);
+    Napi::Value get_tpot(const Napi::CallbackInfo& info);
+    Napi::Value get_ipot(const Napi::CallbackInfo& info);
+    Napi::Value get_throughput(const Napi::CallbackInfo& info);
+
+    Napi::Value get_inference_duration(const Napi::CallbackInfo& info);
+    Napi::Value get_generate_duration(const Napi::CallbackInfo& info);
+    Napi::Value get_tokenization_duration(const Napi::CallbackInfo& info);
+    Napi::Value get_detokenization_duration(const Napi::CallbackInfo& info);
+
+    Napi::Value get_grammar_compiler_init_times(const Napi::CallbackInfo& info);
+    Napi::Value get_grammar_compile_time(const Napi::CallbackInfo& info);
+
+    /**
+     * @brief Base implementation of get_raw_metrics.
+     *
+     * Derived classes MUST override this method to use it with InstanceAccessor.
+     * Example:
+     *
+     * Napi::Value get_raw_metrics(const Napi::CallbackInfo& info) {
+     *     return BasePerfMetricsWrapper<DerivedClass, MetricsType>::get_raw_metrics(info);
+     * }
+     */
+    Napi::Value get_raw_metrics(const Napi::CallbackInfo& info);
+    Napi::Value add(const Napi::CallbackInfo& info);
+    MetricsType& get_value();
+
+protected:
+    MetricsType _metrics;
+};
+
+// Template implementations
+
+template <class T, class MetricsType>
+BasePerfMetricsWrapper<T, MetricsType>::BasePerfMetricsWrapper(const Napi::CallbackInfo& info)
+    : Napi::ObjectWrap<T>(info),
+      _metrics{} {}
+
+template <class T, class MetricsType>
+std::vector<typename BasePerfMetricsWrapper<T, MetricsType>::PropertyDescriptor>
+BasePerfMetricsWrapper<T, MetricsType>::get_class_properties() {
+    return {
+        T::InstanceMethod("getLoadTime", &T::get_load_time),
+        T::InstanceMethod("getNumGeneratedTokens", &T::get_num_generated_tokens),
+        T::InstanceMethod("getNumInputTokens", &T::get_num_input_tokens),
+        T::InstanceMethod("getTTFT", &T::get_ttft),
+        T::InstanceMethod("getTPOT", &T::get_tpot),
+        T::InstanceMethod("getIPOT", &T::get_ipot),
+        T::InstanceMethod("getThroughput", &T::get_throughput),
+        T::InstanceMethod("getInferenceDuration", &T::get_inference_duration),
+        T::InstanceMethod("getGenerateDuration", &T::get_generate_duration),
+        T::InstanceMethod("getTokenizationDuration", &T::get_tokenization_duration),
+        T::InstanceMethod("getDetokenizationDuration", &T::get_detokenization_duration),
+        T::InstanceMethod("getGrammarCompilerInitTimes", &T::get_grammar_compiler_init_times),
+        T::InstanceMethod("getGrammarCompileTime", &T::get_grammar_compile_time),
+        T::template InstanceAccessor<&T::get_raw_metrics>("rawMetrics"),
+        T::InstanceMethod("add", &T::add),
+    };
+}
+
+template <class T, class MetricsType>
+Napi::Value BasePerfMetricsWrapper<T, MetricsType>::get_load_time(const Napi::CallbackInfo& info) {
+    VALIDATE_ARGS_COUNT(info, 0, "getLoadTime()");
+    return Napi::Number::New(info.Env(), _metrics.get_load_time());
+}
+
+template <class T, class MetricsType>
+Napi::Value BasePerfMetricsWrapper<T, MetricsType>::get_num_generated_tokens(const Napi::CallbackInfo& info) {
+    VALIDATE_ARGS_COUNT(info, 0, "getNumGeneratedTokens()");
+    return Napi::Number::New(info.Env(), _metrics.get_num_generated_tokens());
+}
+
+template <class T, class MetricsType>
+Napi::Value BasePerfMetricsWrapper<T, MetricsType>::get_num_input_tokens(const Napi::CallbackInfo& info) {
+    VALIDATE_ARGS_COUNT(info, 0, "getNumInputTokens()");
+    return Napi::Number::New(info.Env(), _metrics.get_num_input_tokens());
+}
+
+template <class T, class MetricsType>
+Napi::Value BasePerfMetricsWrapper<T, MetricsType>::get_ttft(const Napi::CallbackInfo& info) {
+    VALIDATE_ARGS_COUNT(info, 0, "getTTFT()");
+    return perf_utils::create_mean_std_pair(info.Env(), _metrics.get_ttft());
+}
+
+template <class T, class MetricsType>
+Napi::Value BasePerfMetricsWrapper<T, MetricsType>::get_tpot(const Napi::CallbackInfo& info) {
+    VALIDATE_ARGS_COUNT(info, 0, "getTPOT()");
+    return perf_utils::create_mean_std_pair(info.Env(), _metrics.get_tpot());
+}
+
+template <class T, class MetricsType>
+Napi::Value BasePerfMetricsWrapper<T, MetricsType>::get_ipot(const Napi::CallbackInfo& info) {
+    VALIDATE_ARGS_COUNT(info, 0, "getIPOT()");
+    return perf_utils::create_mean_std_pair(info.Env(), _metrics.get_ipot());
+}
+
+template <class T, class MetricsType>
+Napi::Value BasePerfMetricsWrapper<T, MetricsType>::get_throughput(const Napi::CallbackInfo& info) {
+    VALIDATE_ARGS_COUNT(info, 0, "getThroughput()");
+    return perf_utils::create_mean_std_pair(info.Env(), _metrics.get_throughput());
+}
+
+template <class T, class MetricsType>
+Napi::Value BasePerfMetricsWrapper<T, MetricsType>::get_inference_duration(const Napi::CallbackInfo& info) {
+    VALIDATE_ARGS_COUNT(info, 0, "getInferenceDuration()");
+    return perf_utils::create_mean_std_pair(info.Env(), _metrics.get_inference_duration());
+}
+
+template <class T, class MetricsType>
+Napi::Value BasePerfMetricsWrapper<T, MetricsType>::get_generate_duration(const Napi::CallbackInfo& info) {
+    VALIDATE_ARGS_COUNT(info, 0, "getGenerateDuration()");
+    return perf_utils::create_mean_std_pair(info.Env(), _metrics.get_generate_duration());
+}
+
+template <class T, class MetricsType>
+Napi::Value BasePerfMetricsWrapper<T, MetricsType>::get_tokenization_duration(const Napi::CallbackInfo& info) {
+    VALIDATE_ARGS_COUNT(info, 0, "getTokenizationDuration()");
+    return perf_utils::create_mean_std_pair(info.Env(), _metrics.get_tokenization_duration());
+}
+
+template <class T, class MetricsType>
+Napi::Value BasePerfMetricsWrapper<T, MetricsType>::get_detokenization_duration(const Napi::CallbackInfo& info) {
+    VALIDATE_ARGS_COUNT(info, 0, "getDetokenizationDuration()");
+    return perf_utils::create_mean_std_pair(info.Env(), _metrics.get_detokenization_duration());
+}
+
+template <class T, class MetricsType>
+Napi::Value BasePerfMetricsWrapper<T, MetricsType>::get_grammar_compiler_init_times(const Napi::CallbackInfo& info) {
+    VALIDATE_ARGS_COUNT(info, 0, "getGrammarCompilerInitTimes()");
+    return cpp_map_to_js_object(info.Env(), _metrics.get_grammar_compiler_init_times());
+}
+
+template <class T, class MetricsType>
+Napi::Value BasePerfMetricsWrapper<T, MetricsType>::get_grammar_compile_time(const Napi::CallbackInfo& info) {
+    VALIDATE_ARGS_COUNT(info, 0, "getGrammarCompileTime()");
+    return perf_utils::create_summary_stats(info.Env(), _metrics.get_grammar_compile_time());
+}
+
+template <class T, class MetricsType>
+Napi::Value BasePerfMetricsWrapper<T, MetricsType>::get_raw_metrics(const Napi::CallbackInfo& info) {
+    Napi::Object obj = Napi::Object::New(info.Env());
+    obj.Set("generateDurations",
+            cpp_to_js<std::vector<float>, Napi::Value>(
+                info.Env(),
+                get_ms(_metrics.raw_metrics, &ov::genai::RawPerfMetrics::generate_durations)));
+    obj.Set("tokenizationDurations",
+            cpp_to_js<std::vector<float>, Napi::Value>(
+                info.Env(),
+                get_ms(_metrics.raw_metrics, &ov::genai::RawPerfMetrics::tokenization_durations)));
+    obj.Set("detokenizationDurations",
+            cpp_to_js<std::vector<float>, Napi::Value>(
+                info.Env(),
+                get_ms(_metrics.raw_metrics, &ov::genai::RawPerfMetrics::detokenization_durations)));
+
+    obj.Set("timesToFirstToken",
+            cpp_to_js<std::vector<float>, Napi::Value>(
+                info.Env(),
+                get_ms(_metrics.raw_metrics, &ov::genai::RawPerfMetrics::m_times_to_first_token)));
+    obj.Set("newTokenTimes",
+            cpp_to_js<std::vector<double>, Napi::Value>(
+                info.Env(),
+                timestamp_to_ms(_metrics.raw_metrics, &ov::genai::RawPerfMetrics::m_new_token_times)));
+    obj.Set("tokenInferDurations",
+            cpp_to_js<std::vector<float>, Napi::Value>(
+                info.Env(),
+                get_ms(_metrics.raw_metrics, &ov::genai::RawPerfMetrics::m_token_infer_durations)));
+    obj.Set("batchSizes", cpp_to_js<std::vector<size_t>, Napi::Value>(info.Env(), _metrics.raw_metrics.m_batch_sizes));
+    obj.Set("durations",
+            cpp_to_js<std::vector<float>, Napi::Value>(
+                info.Env(),
+                get_ms(_metrics.raw_metrics, &ov::genai::RawPerfMetrics::m_durations)));
+    obj.Set("inferenceDurations",
+            cpp_to_js<std::vector<float>, Napi::Value>(
+                info.Env(),
+                get_ms(_metrics.raw_metrics, &ov::genai::RawPerfMetrics::m_inference_durations)));
+
+    obj.Set("grammarCompileTimes",
+            cpp_to_js<std::vector<float>, Napi::Value>(
+                info.Env(),
+                get_ms(_metrics.raw_metrics, &ov::genai::RawPerfMetrics::m_grammar_compile_times)));
+
+    return obj;
+}
+
+template <class T, class MetricsType>
+Napi::Value BasePerfMetricsWrapper<T, MetricsType>::add(const Napi::CallbackInfo& info) {
+    VALIDATE_ARGS_COUNT(info, 1, "add()");
+    const auto env = info.Env();
+    try {
+        _metrics += unwrap<MetricsType>(env, info[0]);
+    } catch (const std::exception& ex) {
+        Napi::TypeError::New(env, ex.what()).ThrowAsJavaScriptException();
+    }
+    return info.This();
+}
+
+template <class T, class MetricsType>
+MetricsType& BasePerfMetricsWrapper<T, MetricsType>::get_value() {
+    return _metrics;
+}
diff --git a/src/js/include/helper.hpp b/src/js/include/helper.hpp
index 55370d91e3..cccfcc5281 100644
--- a/src/js/include/helper.hpp
+++ b/src/js/include/helper.hpp
@@ -1,9 +1,10 @@
 #pragma once
 #include <napi.h>
 
+#include "openvino/core/type/element_type.hpp"
 #include "openvino/genai/llm_pipeline.hpp"
 #include "openvino/genai/rag/text_embedding_pipeline.hpp"
-#include "openvino/core/type/element_type.hpp"
+#include "openvino/genai/visual_language/pipeline.hpp"
 #include "openvino/openvino.hpp"
 
 template<class... Ts> struct overloaded : Ts... {using Ts::operator()...;};
@@ -64,6 +65,8 @@ template <>
 ov::genai::StructuredOutputConfig::StructuralTag js_to_cpp<ov::genai::StructuredOutputConfig::StructuralTag>(const Napi::Env& env, const Napi::Value& value);
 template <>
 ov::Tensor js_to_cpp<ov::Tensor>(const Napi::Env& env, const Napi::Value& value);
+template <>
+std::vector<ov::Tensor> js_to_cpp<std::vector<ov::Tensor>>(const Napi::Env& env, const Napi::Value& value);
 /**
  * @brief  Unwraps a C++ object from a JavaScript wrapper.
  * @tparam TargetType The C++ class type to extract.
@@ -75,6 +78,9 @@ TargetType& unwrap(const Napi::Env& env, const Napi::Value& value);
 template <>
 ov::genai::PerfMetrics& unwrap<ov::genai::PerfMetrics>(const Napi::Env& env, const Napi::Value& value);
 
+template <>
+ov::genai::VLMPerfMetrics& unwrap<ov::genai::VLMPerfMetrics>(const Napi::Env& env, const Napi::Value& value);
+
 /**
  * @brief  Template function to convert C++ data types into Javascript data types
  * @tparam TargetType Destinated Javascript data type.
@@ -144,3 +150,7 @@ std::string json_stringify(const Napi::Env& env, const Napi::Value& value);
 Napi::Value json_parse(const Napi::Env& env, const std::string& value);
 
 Napi::Function get_prototype_from_ov_addon(const Napi::Env& env, const std::string& ctor_name);
+
+Napi::Object to_decoded_result(const Napi::Env& env, const ov::genai::DecodedResults& results);
+
+Napi::Object to_vlm_decoded_result(const Napi::Env& env, const ov::genai::VLMDecodedResults& results);
diff --git a/src/js/include/perf_metrics.hpp b/src/js/include/perf_metrics.hpp
index dd2aa7f587..fc1ddfbb7b 100644
--- a/src/js/include/perf_metrics.hpp
+++ b/src/js/include/perf_metrics.hpp
@@ -1,36 +1,19 @@
+// Copyright (C) 2018-2025 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+
 #pragma once
 
 #include <napi.h>
 
+#include "include/base/perf_metrics.hpp"
 #include "openvino/genai/perf_metrics.hpp"
 
-class PerfMetricsWrapper : public Napi::ObjectWrap<PerfMetricsWrapper> {
+class PerfMetricsWrapper : public BasePerfMetricsWrapper<PerfMetricsWrapper> {
 public:
     PerfMetricsWrapper(const Napi::CallbackInfo& info);
 
     static Napi::Function get_class(Napi::Env env);
     static Napi::Object wrap(Napi::Env env, const ov::genai::PerfMetrics& metrics);
 
-    Napi::Value get_load_time(const Napi::CallbackInfo& info);
-    Napi::Value get_num_generated_tokens(const Napi::CallbackInfo& info);
-    Napi::Value get_num_input_tokens(const Napi::CallbackInfo& info);
-    Napi::Value get_ttft(const Napi::CallbackInfo& info);
-    Napi::Value get_tpot(const Napi::CallbackInfo& info);
-    Napi::Value get_ipot(const Napi::CallbackInfo& info);
-    Napi::Value get_throughput(const Napi::CallbackInfo& info);
-
-    Napi::Value get_inference_duration(const Napi::CallbackInfo& info);
-    Napi::Value get_generate_duration(const Napi::CallbackInfo& info);
-    Napi::Value get_tokenization_duration(const Napi::CallbackInfo& info);
-    Napi::Value get_detokenization_duration(const Napi::CallbackInfo& info);
-
-    Napi::Value get_grammar_compiler_init_times(const Napi::CallbackInfo& info);
-    Napi::Value get_grammar_compile_time(const Napi::CallbackInfo& info);
-
     Napi::Value get_raw_metrics(const Napi::CallbackInfo& info);
-    Napi::Value add(const Napi::CallbackInfo& info);
-    ov::genai::PerfMetrics& get_value();
-
-private:
-    ov::genai::PerfMetrics _metrics;
 };
diff --git a/src/js/include/vlm_pipeline/finish_chat_worker.hpp b/src/js/include/vlm_pipeline/finish_chat_worker.hpp
new file mode 100644
index 0000000000..1c8c621e91
--- /dev/null
+++ b/src/js/include/vlm_pipeline/finish_chat_worker.hpp
@@ -0,0 +1,22 @@
+// Copyright (C) 2018-2025 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+
+#pragma once
+
+#include <napi.h>
+
+#include "openvino/genai/visual_language/pipeline.hpp"
+
+using namespace Napi;
+
+class VLMFinishChatWorker : public AsyncWorker {
+public:
+    VLMFinishChatWorker(Function& callback, std::shared_ptr<ov::genai::VLMPipeline>& pipe);
+    virtual ~VLMFinishChatWorker() {}
+
+    void Execute() override;
+    void OnOK() override;
+
+private:
+    std::shared_ptr<ov::genai::VLMPipeline>& pipe;
+};
diff --git a/src/js/include/vlm_pipeline/init_worker.hpp b/src/js/include/vlm_pipeline/init_worker.hpp
new file mode 100644
index 0000000000..17ca8a794e
--- /dev/null
+++ b/src/js/include/vlm_pipeline/init_worker.hpp
@@ -0,0 +1,32 @@
+// Copyright (C) 2018-2025 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+
+#pragma once
+
+#include <napi.h>
+
+#include "openvino/genai/visual_language/pipeline.hpp"
+
+using namespace Napi;
+
+class VLMInitWorker : public AsyncWorker {
+public:
+    VLMInitWorker(Function& callback,
+                  std::shared_ptr<ov::genai::VLMPipeline>& pipe,
+                  std::shared_ptr<bool> is_initializing,
+                  const std::string model_path,
+                  std::string device,
+                  ov::AnyMap properties);
+    virtual ~VLMInitWorker() {}
+
+    void Execute() override;
+    void OnOK() override;
+    void OnError(const Error& e) override;
+
+private:
+    std::shared_ptr<ov::genai::VLMPipeline>& pipe;
+    std::shared_ptr<bool> is_initializing;
+    std::string model_path;
+    std::string device;
+    ov::AnyMap properties;
+};
diff --git a/src/js/include/vlm_pipeline/perf_metrics.hpp b/src/js/include/vlm_pipeline/perf_metrics.hpp
new file mode 100644
index 0000000000..4333b159c1
--- /dev/null
+++ b/src/js/include/vlm_pipeline/perf_metrics.hpp
@@ -0,0 +1,21 @@
+// Copyright (C) 2018-2025 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+
+#pragma once
+
+#include <napi.h>
+
+#include "include/base/perf_metrics.hpp"
+#include "openvino/genai/visual_language/perf_metrics.hpp"
+
+class VLMPerfMetricsWrapper : public BasePerfMetricsWrapper<VLMPerfMetricsWrapper, ov::genai::VLMPerfMetrics> {
+public:
+    VLMPerfMetricsWrapper(const Napi::CallbackInfo& info);
+
+    static Napi::Function get_class(Napi::Env env);
+    static Napi::Object wrap(Napi::Env env, const ov::genai::VLMPerfMetrics& metrics);
+
+    Napi::Value get_prepare_embeddings_duration(const Napi::CallbackInfo& info);
+    Napi::Value get_raw_metrics(const Napi::CallbackInfo& info);
+    Napi::Value get_vlm_raw_metrics(const Napi::CallbackInfo& info);
+};
diff --git a/src/js/include/vlm_pipeline/start_chat_worker.hpp b/src/js/include/vlm_pipeline/start_chat_worker.hpp
new file mode 100644
index 0000000000..cb7ce0ae8d
--- /dev/null
+++ b/src/js/include/vlm_pipeline/start_chat_worker.hpp
@@ -0,0 +1,23 @@
+// Copyright (C) 2018-2025 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+
+#pragma once
+
+#include <napi.h>
+
+#include "openvino/genai/visual_language/pipeline.hpp"
+
+using namespace Napi;
+
+class VLMStartChatWorker : public AsyncWorker {
+public:
+    VLMStartChatWorker(Function& callback, std::shared_ptr<ov::genai::VLMPipeline>& pipe, std::string system_message);
+    virtual ~VLMStartChatWorker() {}
+
+    void Execute() override;
+    void OnOK() override;
+
+private:
+    std::shared_ptr<ov::genai::VLMPipeline>& pipe;
+    std::string system_message;
+};
diff --git a/src/js/include/vlm_pipeline/vlm_pipeline_wrapper.hpp b/src/js/include/vlm_pipeline/vlm_pipeline_wrapper.hpp
new file mode 100644
index 0000000000..b7a34ab193
--- /dev/null
+++ b/src/js/include/vlm_pipeline/vlm_pipeline_wrapper.hpp
@@ -0,0 +1,30 @@
+// Copyright (C) 2018-2025 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+
+#pragma once
+
+#include <napi.h>
+
+#include <thread>
+
+#include "openvino/genai/visual_language/pipeline.hpp"
+
+class VLMPipelineWrapper : public Napi::ObjectWrap<VLMPipelineWrapper> {
+public:
+    VLMPipelineWrapper(const Napi::CallbackInfo& info);
+
+    static Napi::Function get_class(Napi::Env env);
+
+    Napi::Value init(const Napi::CallbackInfo& info);
+    Napi::Value generate(const Napi::CallbackInfo& info);
+    Napi::Value start_chat(const Napi::CallbackInfo& info);
+    Napi::Value finish_chat(const Napi::CallbackInfo& info);
+    Napi::Value get_tokenizer(const Napi::CallbackInfo& info);
+    Napi::Value set_chat_template(const Napi::CallbackInfo& info);
+    Napi::Value set_generation_config(const Napi::CallbackInfo& info);
+
+private:
+    std::shared_ptr<ov::genai::VLMPipeline> pipe = nullptr;
+    std::shared_ptr<bool> is_initializing = std::make_shared<bool>(false);
+    std::shared_ptr<bool> is_generating = std::make_shared<bool>(false);
+};
diff --git a/src/js/lib/addon.ts b/src/js/lib/addon.ts
index b6023e5a09..188af9324f 100644
--- a/src/js/lib/addon.ts
+++ b/src/js/lib/addon.ts
@@ -1,9 +1,12 @@
 import { createRequire } from "module";
 import { platform } from "node:os";
 import { join, dirname, resolve } from "node:path";
+import { Tensor } from "openvino-node";
 import type { ChatHistory as IChatHistory } from "./chatHistory.js";
 import type { Tokenizer as ITokenizer } from "./tokenizer.js";
 import { addon as ovAddon } from "openvino-node";
+import { GenerationConfig, StreamingStatus, VLMPipelineProperties } from "./utils.js";
+import { VLMPerfMetrics } from "./perfMetrics.js";
 
 export type EmbeddingResult = Float32Array | Int8Array | Uint8Array;
 export type EmbeddingResults = Float32Array[] | Int8Array[] | Uint8Array[];
@@ -58,9 +61,36 @@ export interface TextEmbeddingPipelineWrapper {
   embedDocumentsSync(documents: string[]): EmbeddingResults;
 }
 
+export interface VLMPipeline {
+  new (): VLMPipeline;
+  init(
+    modelPath: string,
+    device: string,
+    ovProperties: VLMPipelineProperties,
+    callback: (err: Error | null) => void,
+  ): void;
+  generate(
+    prompt: string,
+    images: Tensor[] | undefined,
+    videos: Tensor[] | undefined,
+    streamer: ((chunk: string) => StreamingStatus) | undefined,
+    generationConfig: GenerationConfig | undefined,
+    callback: (
+      err: Error | null,
+      result: { texts: string[]; scores: number[]; perfMetrics: VLMPerfMetrics },
+    ) => void,
+  ): void;
+  startChat(systemMessage: string, callback: (err: Error | null) => void): void;
+  finishChat(callback: (err: Error | null) => void): void;
+  getTokenizer(): ITokenizer;
+  setChatTemplate(template: string): void;
+  setGenerationConfig(config: GenerationConfig): void;
+}
+
 interface OpenVINOGenAIAddon {
   TextEmbeddingPipeline: TextEmbeddingPipelineWrapper;
   LLMPipeline: any;
+  VLMPipeline: VLMPipeline;
   ChatHistory: IChatHistory;
   Tokenizer: ITokenizer;
   setOpenvinoAddon: (ovAddon: any) => void;
@@ -84,6 +114,6 @@ function getGenAIAddon(): OpenVINOGenAIAddon {
 const addon = getGenAIAddon();
 addon.setOpenvinoAddon(ovAddon);
 
-export const { TextEmbeddingPipeline, LLMPipeline, ChatHistory, Tokenizer } = addon;
+export const { TextEmbeddingPipeline, LLMPipeline, VLMPipeline, ChatHistory, Tokenizer } = addon;
 export type ChatHistory = IChatHistory;
 export type Tokenizer = ITokenizer;
diff --git a/src/js/lib/decodedResults.ts b/src/js/lib/decodedResults.ts
new file mode 100644
index 0000000000..f74db73508
--- /dev/null
+++ b/src/js/lib/decodedResults.ts
@@ -0,0 +1,56 @@
+// Copyright (C) 2018-2025 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+
+import { PerfMetrics, VLMPerfMetrics } from "./perfMetrics.js";
+
+/**
+ * Structure to store resulting batched text outputs and scores for each batch.
+ * @note The first num_return_sequences elements correspond to the first batch element.
+ */
+export class DecodedResults {
+  /**
+   * @param {string[]} texts - Vector of resulting sequences.
+   * @param {number[]} scores - Scores for each sequence.
+   * @param {PerfMetrics} perfMetrics - Performance metrics (tpot, ttft, etc.).
+   */
+  constructor(texts: string[], scores: number[], perfMetrics: PerfMetrics) {
+    this.texts = texts;
+    this.scores = scores;
+    this.perfMetrics = perfMetrics;
+  }
+  toString() {
+    if (this.scores.length !== this.texts.length) {
+      throw new Error("The number of scores and texts doesn't match in DecodedResults.");
+    }
+    if (this.texts.length === 0) {
+      return "";
+    }
+    if (this.texts.length === 1) {
+      return this.texts[0];
+    }
+    const lines = this.scores.map((score, i) => `${score.toFixed(6)}: ${this.texts[i]}`);
+    return lines.join("\n");
+  }
+  texts: string[];
+  scores: number[];
+  perfMetrics: PerfMetrics;
+}
+
+/**
+ * Structure to store VLM resulting batched text outputs and scores for each batch.
+ * @note The first num_return_sequences elements correspond to the first batch element.
+ */
+export class VLMDecodedResults extends DecodedResults {
+  /**
+   * @param {string[]} texts - Vector of resulting sequences.
+   * @param {number[]} scores - Scores for each sequence.
+   * @param {VLMPerfMetrics} perfMetrics - VLM-specific performance metrics.
+   */
+  constructor(texts: string[], scores: number[], perfMetrics: VLMPerfMetrics) {
+    super(texts, scores, perfMetrics);
+    this.perfMetrics = perfMetrics;
+  }
+
+  /** VLM specific performance metrics. */
+  perfMetrics: VLMPerfMetrics;
+}
diff --git a/src/js/lib/index.ts b/src/js/lib/index.ts
index ad8e49168f..dd36cf2227 100644
--- a/src/js/lib/index.ts
+++ b/src/js/lib/index.ts
@@ -2,8 +2,9 @@
 // SPDX-License-Identifier: Apache-2.0
 
 import { LLMPipeline as LLM } from "./pipelines/llmPipeline.js";
+import { VLMPipeline as VLM } from "./pipelines/vlmPipeline.js";
 import { TextEmbeddingPipeline as Embedding } from "./pipelines/textEmbeddingPipeline.js";
-import { LLMPipelineProperties } from "./utils.js";
+import { LLMPipelineProperties, VLMPipelineProperties } from "./utils.js";
 
 class PipelineFactory {
   static async LLMPipeline(modelPath: string, device?: string): Promise<any>;
@@ -28,6 +29,18 @@ class PipelineFactory {
     await pipeline.init();
     return pipeline;
   }
+
+  static async VLMPipeline(
+    modelPath: string,
+    device: string = "CPU",
+    properties: VLMPipelineProperties = {},
+  ) {
+    const pipeline = new VLM(modelPath, device, properties);
+    await pipeline.init();
+
+    return pipeline;
+  }
+
   static async TextEmbeddingPipeline(modelPath: string, device = "CPU", config = {}) {
     const pipeline = new Embedding(modelPath, device, config);
     await pipeline.init();
@@ -36,8 +49,9 @@ class PipelineFactory {
   }
 }
 
-export const { LLMPipeline, TextEmbeddingPipeline } = PipelineFactory;
-export { DecodedResults } from "./pipelines/llmPipeline.js";
+export const { LLMPipeline, VLMPipeline, TextEmbeddingPipeline } = PipelineFactory;
+export { DecodedResults, VLMDecodedResults } from "./decodedResults.js";
+export { PerfMetrics, VLMPerfMetrics } from "./perfMetrics.js";
 export * from "./utils.js";
 export * from "./addon.js";
 export type { TokenizedInputs, EncodeOptions, DecodeOptions } from "./tokenizer.js";
diff --git a/src/js/lib/perfMetrics.ts b/src/js/lib/perfMetrics.ts
new file mode 100644
index 0000000000..d312505024
--- /dev/null
+++ b/src/js/lib/perfMetrics.ts
@@ -0,0 +1,118 @@
+// Copyright (C) 2018-2025 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+
+/** Structure holding mean and standard deviation values. */
+export type MeanStdPair = {
+  mean: number;
+  std: number;
+};
+
+/** Structure holding summary of statistical values */
+export type SummaryStats = MeanStdPair & {
+  min: number;
+  max: number;
+};
+
+/** Structure with raw performance metrics for each generation before any statistics are calculated. */
+export type RawMetrics = {
+  /** Durations for each generate call in milliseconds. */
+  generateDurations: number[];
+  /** Durations for the tokenization process in milliseconds. */
+  tokenizationDurations: number[];
+  /** Durations for the detokenization process in milliseconds. */
+  detokenizationDurations: number[];
+  /** Times to the first token for each call in milliseconds. */
+  timesToFirstToken: number[];
+  /** Timestamps of generation every token or batch of tokens in milliseconds. */
+  newTokenTimes: number[];
+  /** Inference time for each token in milliseconds. */
+  tokenInferDurations: number[];
+  /** Batch sizes for each generate call. */
+  batchSizes: number[];
+  /** Total durations for each generate call in milliseconds. */
+  durations: number[];
+  /** Total inference duration for each generate call in microseconds. */
+  inferenceDurations: number[];
+  /** Time to compile the grammar in milliseconds. */
+  grammarCompileTimes: number[];
+};
+
+/** Structure with raw performance metrics for VLM generation. */
+export type VLMRawMetrics = {
+  /** Durations for embedding preparation in milliseconds. */
+  prepareEmbeddingsDurations: number[];
+};
+
+/**
+ * Holds performance metrics for each generate call.
+ *
+ * PerfMetrics holds the following metrics with mean and standard deviations:
+    - Time To the First Token (TTFT), ms
+    - Time per Output Token (TPOT), ms/token
+    - Inference time per Output Token (IPOT), ms/token
+    - Generate total duration, ms
+    - Inference duration, ms
+    - Tokenization duration, ms
+    - Detokenization duration, ms
+    - Throughput, tokens/s
+    - Load time, ms
+    - Number of generated tokens
+    - Number of tokens in the input prompt
+    - Time to initialize grammar compiler for each backend, ms
+    - Time to compile grammar, ms
+ * Preferable way to access metrics is via getter methods. Getter methods calculate mean and std values from rawMetrics and return pairs.
+ * If mean and std were already calculated, getter methods return cached values.
+ */
+export interface PerfMetrics {
+  /** Returns the load time in milliseconds. */
+  getLoadTime(): number;
+  /** Returns the number of generated tokens. */
+  getNumGeneratedTokens(): number;
+  /** Returns the number of tokens in the input prompt. */
+  getNumInputTokens(): number;
+  /** Returns the mean and standard deviation of Time To the First Token (TTFT) in milliseconds. */
+  getTTFT(): MeanStdPair;
+  /** Returns the mean and standard deviation of Time Per Output Token (TPOT) in milliseconds. */
+  getTPOT(): MeanStdPair;
+  /** Returns the mean and standard deviation of Inference time Per Output Token in milliseconds. */
+  getIPOT(): MeanStdPair;
+  /** Returns the mean and standard deviation of throughput in tokens per second. */
+  getThroughput(): MeanStdPair;
+  /** Returns the mean and standard deviation of the time spent on model inference during generate call in milliseconds. */
+  getInferenceDuration(): MeanStdPair;
+  /** Returns the mean and standard deviation of generate durations in milliseconds. */
+  getGenerateDuration(): MeanStdPair;
+  /** Returns the mean and standard deviation of tokenization durations in milliseconds. */
+  getTokenizationDuration(): MeanStdPair;
+  /** Returns the mean and standard deviation of detokenization durations in milliseconds. */
+  getDetokenizationDuration(): MeanStdPair;
+  /** Returns a map with the time to initialize the grammar compiler for each backend in milliseconds. */
+  getGrammarCompilerInitTimes(): { [key: string]: number };
+  /** Returns the mean, standard deviation, min, and max of grammar compile times in milliseconds. */
+  getGrammarCompileTime(): SummaryStats;
+  /** A structure of RawPerfMetrics type that holds raw metrics. */
+  rawMetrics: RawMetrics;
+
+  /** Adds the metrics from another PerfMetrics object to this one.
+   * @returns The current PerfMetrics instance.
+   */
+  add(other: PerfMetrics): this;
+}
+
+/**
+ * Holds performance metrics for each VLM generate call.
+ *
+ * VLMPerfMetrics extends PerfMetrics with VLM-specific metrics:
+ *  - Prepare embeddings duration, ms
+ */
+export interface VLMPerfMetrics extends PerfMetrics {
+  /** Returns the mean and standard deviation of embeddings preparation duration in milliseconds. */
+  getPrepareEmbeddingsDuration(): MeanStdPair;
+  /** VLM specific raw metrics */
+  vlmRawMetrics: VLMRawMetrics;
+
+  /** Adds the metrics from another VLMPerfMetrics object to this one.
+   * @returns The current VLMPerfMetrics instance.
+   */
+  add(other: VLMPerfMetrics): this;
+}
diff --git a/src/js/lib/pipelines/llmPipeline.ts b/src/js/lib/pipelines/llmPipeline.ts
index f05e654e5e..ce2a418436 100644
--- a/src/js/lib/pipelines/llmPipeline.ts
+++ b/src/js/lib/pipelines/llmPipeline.ts
@@ -1,6 +1,7 @@
 import util from "node:util";
 import { ChatHistory, LLMPipeline as LLMPipelineWrap } from "../addon.js";
 import { GenerationConfig, StreamingStatus, LLMPipelineProperties } from "../utils.js";
+import { DecodedResults } from "../decodedResults.js";
 import { Tokenizer } from "../tokenizer.js";
 
 export type ResolveFunction = (arg: { value: string; done: boolean }) => void;
@@ -9,131 +10,6 @@ export type Options = {
   max_new_tokens?: number;
 };
 
-/** Structure with raw performance metrics for each generation before any statistics are calculated. */
-export type RawMetrics = {
-  /** Durations for each generate call in milliseconds. */
-  generateDurations: number[];
-  /** Durations for the tokenization process in milliseconds. */
-  tokenizationDurations: number[];
-  /** Durations for the detokenization process in milliseconds. */
-  detokenizationDurations: number[];
-  /** Times to the first token for each call in milliseconds. */
-  timesToFirstToken: number[];
-  /** Timestamps of generation every token or batch of tokens in milliseconds. */
-  newTokenTimes: number[];
-  /** Inference time for each token in milliseconds. */
-  tokenInferDurations: number[];
-  /** Batch sizes for each generate call. */
-  batchSizes: number[];
-  /** Total durations for each generate call in milliseconds. */
-  durations: number[];
-  /** Total inference duration for each generate call in microseconds. */
-  inferenceDurations: number[];
-  /** Time to compile the grammar in milliseconds. */
-  grammarCompileTimes: number[];
-};
-
-/** Structure holding mean and standard deviation values. */
-export type MeanStdPair = {
-  mean: number;
-  std: number;
-};
-
-/** Structure holding summary of statistical values */
-export type SummaryStats = {
-  mean: number;
-  std: number;
-  min: number;
-  max: number;
-};
-
-/**
- * Holds performance metrics for each generate call.
- *
- * PerfMetrics holds the following metrics with mean and standard deviations:
-    - Time To the First Token (TTFT), ms
-    - Time per Output Token (TPOT), ms/token
-    - Inference time per Output Token (IPOT), ms/token
-    - Generate total duration, ms
-    - Inference duration, ms
-    - Tokenization duration, ms
-    - Detokenization duration, ms
-    - Throughput, tokens/s
-    - Load time, ms
-    - Number of generated tokens
-    - Number of tokens in the input prompt
-    - Time to initialize grammar compiler for each backend, ms
-    - Time to compile grammar, ms
- * Preferable way to access metrics is via getter methods. Getter methods calculate mean and std values from rawMetrics and return pairs.
- * If mean and std were already calculated, getter methods return cached values.
- */
-export interface PerfMetrics {
-  /** Returns the load time in milliseconds. */
-  getLoadTime(): number;
-  /** Returns the number of generated tokens. */
-  getNumGeneratedTokens(): number;
-  /** Returns the number of tokens in the input prompt. */
-  getNumInputTokens(): number;
-  /** Returns the mean and standard deviation of Time To the First Token (TTFT) in milliseconds. */
-  getTTFT(): MeanStdPair;
-  /** Returns the mean and standard deviation of Time Per Output Token (TPOT) in milliseconds. */
-  getTPOT(): MeanStdPair;
-  /** Returns the mean and standard deviation of Inference time Per Output Token in milliseconds. */
-  getIPOT(): MeanStdPair;
-  /** Returns the mean and standard deviation of throughput in tokens per second. */
-  getThroughput(): MeanStdPair;
-  /** Returns the mean and standard deviation of the time spent on model inference during generate call in milliseconds. */
-  getInferenceDuration(): MeanStdPair;
-  /** Returns the mean and standard deviation of generate durations in milliseconds. */
-  getGenerateDuration(): MeanStdPair;
-  /** Returns the mean and standard deviation of tokenization durations in milliseconds. */
-  getTokenizationDuration(): MeanStdPair;
-  /** Returns the mean and standard deviation of detokenization durations in milliseconds. */
-  getDetokenizationDuration(): MeanStdPair;
-  /** Returns a map with the time to initialize the grammar compiler for each backend in milliseconds. */
-  getGrammarCompilerInitTimes(): { [key: string]: number };
-  /** Returns the mean, standard deviation, min, and max of grammar compile times in milliseconds. */
-  getGrammarCompileTime(): SummaryStats;
-  /** A structure of RawPerfMetrics type that holds raw metrics. */
-  rawMetrics: RawMetrics;
-
-  /** Adds the metrics from another PerfMetrics object to this one.
-   * @returns The current PerfMetrics instance.
-   */
-  add(other: PerfMetrics): this;
-}
-
-export class DecodedResults {
-  constructor(texts: string[], scores: number[], perfMetrics: PerfMetrics) {
-    this.texts = texts;
-    this.scores = scores;
-    this.perfMetrics = perfMetrics;
-  }
-  toString() {
-    if (this.scores.length !== this.texts.length) {
-      throw new Error("The number of scores and texts doesn't match in DecodedResults.");
-    }
-    if (this.texts.length === 0) {
-      return "";
-    }
-    if (this.texts.length === 1) {
-      return this.texts[0];
-    }
-    let result = "";
-    for (let i = 0; i < this.texts.length - 1; ++i) {
-      result += `${this.scores[i].toFixed(6)}: ${this.texts[i]}\n`;
-    }
-    result += `${this.scores[this.scores.length - 1].toFixed(
-      6,
-    )}: ${this.texts[this.texts.length - 1]}`;
-
-    return result;
-  }
-  texts: string[];
-  scores: number[];
-  perfMetrics: PerfMetrics;
-}
-
 export class LLMPipeline {
   modelPath: string;
   device: string;
diff --git a/src/js/lib/pipelines/vlmPipeline.ts b/src/js/lib/pipelines/vlmPipeline.ts
new file mode 100644
index 0000000000..cdede8ee25
--- /dev/null
+++ b/src/js/lib/pipelines/vlmPipeline.ts
@@ -0,0 +1,225 @@
+// Copyright (C) 2018-2025 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+
+import util from "node:util";
+import { VLMPipeline as VLMPipelineWrapper } from "../addon.js";
+import { GenerationConfig, VLMPipelineProperties, StreamingStatus } from "../utils.js";
+import { VLMDecodedResults } from "../decodedResults.js";
+import { Tokenizer } from "../tokenizer.js";
+import type { Tensor } from "openvino-node";
+import { VLMPerfMetrics } from "../perfMetrics.js";
+
+/**
+ * Options for VLM generation methods.
+ */
+export type VLMGenerateOptions = {
+  /** Array of image tensors to include in the prompt. */
+  images?: Tensor[];
+  /** Array of video frame tensors to include in the prompt. */
+  videos?: Tensor[];
+  /** Generation configuration parameters such as max_length, temperature, etc. */
+  generationConfig?: GenerationConfig;
+};
+
+/**
+ * This class is used for generation with Visual Language Models (VLMs)
+ */
+export class VLMPipeline {
+  protected readonly modelPath: string;
+  protected readonly device: string;
+  protected pipeline: VLMPipelineWrapper | null = null;
+  protected readonly properties: VLMPipelineProperties;
+
+  /**
+   * Construct a VLM pipeline from a folder containing tokenizer and model IRs.
+   * @param modelPath - A folder to read tokenizer and model IRs.
+   * @param device - Inference device. A tokenizer is always compiled for CPU.
+   * @param properties - Device and pipeline properties.
+   */
+  constructor(modelPath: string, device: string, properties: VLMPipelineProperties) {
+    this.modelPath = modelPath;
+    this.device = device;
+    this.properties = properties;
+  }
+
+  /**
+   * Initialize the underlying native pipeline.
+   * @returns Resolves when initialization is complete.
+   */
+  async init() {
+    const pipeline = new VLMPipelineWrapper();
+
+    const initPromise = util.promisify(pipeline.init.bind(pipeline));
+    await initPromise(this.modelPath, this.device, this.properties);
+
+    this.pipeline = pipeline;
+  }
+  /**
+   * Start a chat session with an optional system message.
+   * @param systemMessage - Optional system message to initialize chat context.
+   * @returns Resolves when chat session is started.
+   */
+  async startChat(systemMessage: string = "") {
+    if (!this.pipeline) throw new Error("Pipeline is not initialized");
+
+    const startChatPromise = util.promisify(this.pipeline.startChat.bind(this.pipeline));
+    const result = await startChatPromise(systemMessage);
+
+    return result;
+  }
+  /**
+   * Finish the current chat session and clear chat-related state.
+   * @returns Resolves when chat session is finished.
+   */
+  async finishChat() {
+    if (!this.pipeline) throw new Error("Pipeline is not initialized");
+
+    const finishChatPromise = util.promisify(this.pipeline.finishChat.bind(this.pipeline));
+    const result = await finishChatPromise();
+
+    return result;
+  }
+  /**
+   * Stream generation results as an async iterator of strings.
+   * The iterator yields subword chunks.
+   * @param prompt - Input prompt. May contain image/video tags recognized by the model.
+   * @param options - Optional parameters.
+   * @param options.images - Array of image tensors to include in the prompt.
+   * @param options.videos - Array of video frame tensors to include in the prompt.
+   * @param options.generationConfig - Generation parameters.
+   * @returns Async iterator producing subword chunks.
+   */
+  stream(prompt: string, options: VLMGenerateOptions = {}): AsyncIterableIterator<string> {
+    if (!this.pipeline) throw new Error("Pipeline is not initialized");
+    const { images, videos, generationConfig } = options;
+
+    let streamingStatus: StreamingStatus = StreamingStatus.RUNNING;
+    const queue: { done: boolean; subword: string }[] = [];
+    type ResolveFunction = (arg: { value: string; done: boolean }) => void;
+    type RejectFunction = (reason?: unknown) => void;
+    let resolvePromise: ResolveFunction | null;
+    let rejectPromise: RejectFunction | null;
+
+    const callback = (
+      error: Error | null,
+      result: { texts: string[]; scores: number[]; perfMetrics: VLMPerfMetrics },
+    ) => {
+      if (error) {
+        if (rejectPromise) {
+          rejectPromise(error);
+          // Reset promises
+          resolvePromise = null;
+          rejectPromise = null;
+        } else {
+          throw error;
+        }
+      } else {
+        const decodedResult = new VLMDecodedResults(
+          result.texts,
+          result.scores,
+          result.perfMetrics,
+        );
+        const fullText = decodedResult.toString();
+        if (resolvePromise) {
+          // Fulfill pending request
+          resolvePromise({ done: true, value: fullText });
+          // Reset promises
+          resolvePromise = null;
+          rejectPromise = null;
+        } else {
+          // Add data to queue if no pending promise
+          queue.push({ done: true, subword: fullText });
+        }
+      }
+    };
+
+    const streamer = (chunk: string): StreamingStatus => {
+      if (resolvePromise) {
+        // Fulfill pending request
+        resolvePromise({ done: false, value: chunk });
+        // Reset promises
+        resolvePromise = null;
+        rejectPromise = null;
+      } else {
+        // Add data to queue if no pending promise
+        queue.push({ done: false, subword: chunk });
+      }
+      return streamingStatus;
+    };
+
+    this.pipeline.generate(prompt, images, videos, streamer, generationConfig, callback);
+
+    return {
+      async next() {
+        // If there is data in the queue, return it
+        // Otherwise, return a promise that will resolve when data is available
+        const data = queue.shift();
+
+        if (data) {
+          return { value: data.subword, done: data.done };
+        }
+
+        return new Promise((resolve: ResolveFunction, reject: (reason?: unknown) => void) => {
+          resolvePromise = resolve;
+          rejectPromise = reject;
+        });
+      },
+      async return() {
+        streamingStatus = StreamingStatus.CANCEL;
+
+        return { done: true, value: "" };
+      },
+      [Symbol.asyncIterator]() {
+        return this;
+      },
+    };
+  }
+  /**
+   * Generate sequences for VLMs.
+   * @param prompt - Input prompt. May contain model-specific image/video tags.
+   * @param options - Optional parameters.
+   * @param options.images - Images to include in the prompt.
+   * @param options.videos - Videos to include in the prompt.
+   * @param options.generationConfig - Generation configuration parameters.
+   * @param options.streamer - Optional streamer callback called for each chunk.
+   * @returns Resolves with decoded results once generation finishes.
+   */
+  async generate(
+    prompt: string,
+    options: VLMGenerateOptions & { streamer?: (chunk: string) => StreamingStatus } = {},
+  ): Promise<VLMDecodedResults> {
+    const { images, videos, generationConfig, streamer } = options;
+    if (!this.pipeline) throw new Error("Pipeline is not initialized");
+    const innerGenerate = util.promisify(this.pipeline.generate.bind(this.pipeline));
+    const result = await innerGenerate(prompt, images, videos, streamer, generationConfig);
+
+    return new VLMDecodedResults(result.texts, result.scores, result.perfMetrics);
+  }
+
+  /**
+   * Get the pipeline tokenizer instance.
+   * @returns Tokenizer used by the pipeline.
+   */
+  getTokenizer(): Tokenizer {
+    if (!this.pipeline) throw new Error("Pipeline is not initialized");
+    return this.pipeline.getTokenizer();
+  }
+
+  /**
+   * Set the chat template used when formatting chat history and prompts.
+   * @param chatTemplate - Chat template string.
+   */
+  setChatTemplate(chatTemplate: string): void {
+    if (!this.pipeline) throw new Error("Pipeline is not initialized");
+    this.pipeline.setChatTemplate(chatTemplate);
+  }
+
+  /**
+   * Set generation configuration parameters.
+   * @param config - Generation configuration parameters.
+   */
+  setGenerationConfig(config: GenerationConfig): void {
+    if (!this.pipeline) throw new Error("Pipeline is not initialized");
+    this.pipeline.setGenerationConfig(config);
+  }
+}
diff --git a/src/js/lib/utils.ts b/src/js/lib/utils.ts
index 43684e9fa0..17211549e2 100644
--- a/src/js/lib/utils.ts
+++ b/src/js/lib/utils.ts
@@ -344,3 +344,7 @@ export type SchedulerConfig = {
 export type LLMPipelineProperties = {
   schedulerConfig?: SchedulerConfig;
 };
+
+export type VLMPipelineProperties = {
+  schedulerConfig?: SchedulerConfig;
+} & Record<string, unknown>;
diff --git a/src/js/src/addon.cpp b/src/js/src/addon.cpp
index 72cb3b6b16..80c8d800b1 100644
--- a/src/js/src/addon.cpp
+++ b/src/js/src/addon.cpp
@@ -5,6 +5,8 @@
 
 #include "include/perf_metrics.hpp"
 #include "include/llm_pipeline/llm_pipeline_wrapper.hpp"
+#include "include/vlm_pipeline/vlm_pipeline_wrapper.hpp"
+#include "include/vlm_pipeline/perf_metrics.hpp"
 #include "include/text_embedding_pipeline/pipeline_wrapper.hpp"
 #include "include/tokenizer.hpp"
 #include "include/chat_history.hpp"
@@ -47,9 +49,11 @@ Napi::Object init_module(Napi::Env env, Napi::Object exports) {
     env.SetInstanceData<AddonData>(addon_data);
 
     init_class(env, exports, "LLMPipeline", &LLMPipelineWrapper::get_class, addon_data->core);
+    init_class(env, exports, "VLMPipeline", &VLMPipelineWrapper::get_class, addon_data->vlm_pipeline);
     init_class(env, exports, "TextEmbeddingPipeline", &TextEmbeddingPipelineWrapper::get_class, addon_data->core);
     init_class(env, exports, "Tokenizer", &TokenizerWrapper::get_class, addon_data->tokenizer);
     init_class(env, exports, "PerfMetrics", &PerfMetricsWrapper::get_class, addon_data->perf_metrics);
+    init_class(env, exports, "VLMPerfMetrics", &VLMPerfMetricsWrapper::get_class, addon_data->vlm_perf_metrics);
     init_class(env, exports, "ChatHistory", &ChatHistoryWrap::get_class, addon_data->chat_history);
 
     // Expose a helper to set the openvino-node addon from JS (useful for ESM)
diff --git a/src/js/src/helper.cpp b/src/js/src/helper.cpp
index 5414a7f522..523f09568d 100644
--- a/src/js/src/helper.cpp
+++ b/src/js/src/helper.cpp
@@ -3,6 +3,7 @@
 #include "include/addon.hpp"
 #include "include/chat_history.hpp"
 #include "include/perf_metrics.hpp"
+#include "include/vlm_pipeline/perf_metrics.hpp"
 
 namespace {
 constexpr const char* JS_SCHEDULER_CONFIG_KEY = "schedulerConfig";
@@ -337,6 +338,25 @@ ov::Tensor js_to_cpp<ov::Tensor>(const Napi::Env& env, const Napi::Value& value)
     return *tensor_ptr;
 }
 
+template <>
+std::vector<ov::Tensor> js_to_cpp<std::vector<ov::Tensor>>(const Napi::Env& env, const Napi::Value& value) {
+    std::vector<ov::Tensor> tensors;
+    if (value.IsUndefined() || value.IsNull()) {
+        return tensors;
+    }
+    if (value.IsArray()) {
+        auto array = value.As<Napi::Array>();
+        size_t length = array.Length();
+        tensors.reserve(length);
+        for (uint32_t i = 0; i < length; ++i) {
+            tensors.push_back(js_to_cpp<ov::Tensor>(env, array[i]));
+        }
+    } else {
+        OPENVINO_THROW("Passed argument must be an array of Tensors.");
+    }
+    return tensors;
+}
+
 template <>
 ov::genai::PerfMetrics& unwrap<ov::genai::PerfMetrics>(const Napi::Env& env, const Napi::Value& value) {
     const auto obj = value.As<Napi::Object>();
@@ -350,6 +370,17 @@ ov::genai::PerfMetrics& unwrap<ov::genai::PerfMetrics>(const Napi::Env& env, con
     return js_metrics->get_value();
 }
 
+template <>
+ov::genai::VLMPerfMetrics& unwrap<ov::genai::VLMPerfMetrics>(const Napi::Env& env, const Napi::Value& value) {
+    const auto obj = value.As<Napi::Object>();
+    const auto& prototype = env.GetInstanceData<AddonData>()->vlm_perf_metrics;
+    OPENVINO_ASSERT(prototype, "Invalid pointer to prototype.");
+    OPENVINO_ASSERT(obj.InstanceOf(prototype.Value().As<Napi::Function>()),
+                    "Passed argument is not of type VLMPerfMetrics");
+    const auto js_metrics = Napi::ObjectWrap<VLMPerfMetricsWrapper>::Unwrap(obj);
+    return js_metrics->get_value();
+}
+
 template <>
 ov::genai::ChatHistory& unwrap<ov::genai::ChatHistory>(const Napi::Env& env, const Napi::Value& value) {
     OPENVINO_ASSERT(value.IsObject(), "Passed argument must be an object.");
@@ -539,3 +570,20 @@ Napi::Function get_prototype_from_ov_addon(const Napi::Env& env, const std::stri
 
     return ctor_val.As<Napi::Function>();
 }
+
+Napi::Object to_decoded_result(const Napi::Env& env, const ov::genai::DecodedResults& results) {
+    Napi::Object obj = Napi::Object::New(env);
+    obj.Set("texts", cpp_to_js<std::vector<std::string>, Napi::Value>(env, results.texts));
+    obj.Set("scores", cpp_to_js<std::vector<float>, Napi::Value>(env, results.scores));
+    obj.Set("perfMetrics", PerfMetricsWrapper::wrap(env, results.perf_metrics));
+    obj.Set("subword", Napi::String::New(env, results));
+    return obj;
+}
+
+Napi::Object to_vlm_decoded_result(const Napi::Env& env, const ov::genai::VLMDecodedResults& results) {
+    Napi::Object obj = Napi::Object::New(env);
+    obj.Set("texts", cpp_to_js<std::vector<std::string>, Napi::Value>(env, results.texts));
+    obj.Set("scores", cpp_to_js<std::vector<float>, Napi::Value>(env, results.scores));
+    obj.Set("perfMetrics", VLMPerfMetricsWrapper::wrap(env, results.perf_metrics));
+    return obj;
+}
diff --git a/src/js/src/llm_pipeline/llm_pipeline_wrapper.cpp b/src/js/src/llm_pipeline/llm_pipeline_wrapper.cpp
index 6a78ad24f8..19327b1e60 100644
--- a/src/js/src/llm_pipeline/llm_pipeline_wrapper.cpp
+++ b/src/js/src/llm_pipeline/llm_pipeline_wrapper.cpp
@@ -22,15 +22,6 @@ struct TsfnContext {
     std::shared_ptr<ov::AnyMap> options = nullptr;
 };
 
-Napi::Object create_decoded_results_object(Napi::Env env, const ov::genai::DecodedResults& result) {
-    Napi::Object obj = Napi::Object::New(env);
-    obj.Set("texts", cpp_to_js<std::vector<std::string>, Napi::Value>(env, result.texts));
-    obj.Set("scores", cpp_to_js<std::vector<float>, Napi::Value>(env, result.scores));
-    obj.Set("perfMetrics", PerfMetricsWrapper::wrap(env, result.perf_metrics));
-    obj.Set("subword", Napi::String::New(env, result));
-    return obj;
-}
-
 void performInferenceThread(TsfnContext* context) {
     try {
         ov::genai::GenerationConfig config;
@@ -89,7 +80,7 @@ void performInferenceThread(TsfnContext* context) {
         }, context->inputs);
 
         napi_status status = context->tsfn.BlockingCall([result](Napi::Env env, Napi::Function jsCallback) {
-            jsCallback.Call({Napi::Boolean::New(env, true), create_decoded_results_object(env, result)});
+            jsCallback.Call({Napi::Boolean::New(env, true), to_decoded_result(env, result)});
         });
 
         if (status != napi_ok) {
diff --git a/src/js/src/perf_metrics.cpp b/src/js/src/perf_metrics.cpp
index 1dfdbb62bb..cfce4c24df 100644
--- a/src/js/src/perf_metrics.cpp
+++ b/src/js/src/perf_metrics.cpp
@@ -1,37 +1,16 @@
+// Copyright (C) 2018-2025 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+
 #include "include/perf_metrics.hpp"
 
-#include "bindings_utils.hpp"
 #include "include/addon.hpp"
-#include "include/helper.hpp"
-
-using ov::genai::common_bindings::utils::get_ms;
-using ov::genai::common_bindings::utils::timestamp_to_ms;
 
 PerfMetricsWrapper::PerfMetricsWrapper(const Napi::CallbackInfo& info)
-    : Napi::ObjectWrap<PerfMetricsWrapper>(info),
-      _metrics{} {};
+    : BasePerfMetricsWrapper<PerfMetricsWrapper>(info) {}
 
 Napi::Function PerfMetricsWrapper::get_class(Napi::Env env) {
-    return DefineClass(
-        env,
-        "PerfMetrics",
-        {
-            InstanceMethod("getLoadTime", &PerfMetricsWrapper::get_load_time),
-            InstanceMethod("getNumGeneratedTokens", &PerfMetricsWrapper::get_num_generated_tokens),
-            InstanceMethod("getNumInputTokens", &PerfMetricsWrapper::get_num_input_tokens),
-            InstanceMethod("getTTFT", &PerfMetricsWrapper::get_ttft),
-            InstanceMethod("getTPOT", &PerfMetricsWrapper::get_tpot),
-            InstanceMethod("getIPOT", &PerfMetricsWrapper::get_ipot),
-            InstanceMethod("getThroughput", &PerfMetricsWrapper::get_throughput),
-            InstanceMethod("getInferenceDuration", &PerfMetricsWrapper::get_inference_duration),
-            InstanceMethod("getGenerateDuration", &PerfMetricsWrapper::get_generate_duration),
-            InstanceMethod("getTokenizationDuration", &PerfMetricsWrapper::get_tokenization_duration),
-            InstanceMethod("getDetokenizationDuration", &PerfMetricsWrapper::get_detokenization_duration),
-            InstanceMethod("getGrammarCompilerInitTimes", &PerfMetricsWrapper::get_grammar_compiler_init_times),
-            InstanceMethod("getGrammarCompileTime", &PerfMetricsWrapper::get_grammar_compile_time),
-            InstanceAccessor<&PerfMetricsWrapper::get_raw_metrics>("rawMetrics"),
-            InstanceMethod("add", &PerfMetricsWrapper::add),
-        });
+    auto properties = BasePerfMetricsWrapper<PerfMetricsWrapper>::get_class_properties();
+    return DefineClass(env, "PerfMetrics", properties);
 }
 
 Napi::Object PerfMetricsWrapper::wrap(Napi::Env env, const ov::genai::PerfMetrics& metrics) {
@@ -43,143 +22,6 @@ Napi::Object PerfMetricsWrapper::wrap(Napi::Env env, const ov::genai::PerfMetric
     return obj;
 }
 
-Napi::Value PerfMetricsWrapper::get_load_time(const Napi::CallbackInfo& info) {
-    VALIDATE_ARGS_COUNT(info, 0, "getLoadTime()");
-    return Napi::Number::New(info.Env(), _metrics.get_load_time());
-}
-
-Napi::Value PerfMetricsWrapper::get_num_generated_tokens(const Napi::CallbackInfo& info) {
-    VALIDATE_ARGS_COUNT(info, 0, "getNumGeneratedTokens()");
-    return Napi::Number::New(info.Env(), _metrics.get_num_generated_tokens());
-}
-
-Napi::Value PerfMetricsWrapper::get_num_input_tokens(const Napi::CallbackInfo& info) {
-    VALIDATE_ARGS_COUNT(info, 0, "getNumInputTokens()");
-    return Napi::Number::New(info.Env(), _metrics.get_num_input_tokens());
-}
-
-Napi::Object create_mean_std_pair(Napi::Env env, const ov::genai::MeanStdPair& pair) {
-    Napi::Object obj = Napi::Object::New(env);
-    obj.Set("mean", Napi::Number::New(env, pair.mean));
-    obj.Set("std", Napi::Number::New(env, pair.std));
-    return obj;
-}
-
-Napi::Object create_summary_stats(Napi::Env env, const ov::genai::SummaryStats& stats) {
-    Napi::Object obj = Napi::Object::New(env);
-    obj.Set("mean", Napi::Number::New(env, stats.mean));
-    obj.Set("std", Napi::Number::New(env, stats.std));
-    obj.Set("min", Napi::Number::New(env, stats.min));
-    obj.Set("max", Napi::Number::New(env, stats.max));
-    return obj;
-}
-
-Napi::Value PerfMetricsWrapper::get_ttft(const Napi::CallbackInfo& info) {
-    VALIDATE_ARGS_COUNT(info, 0, "getTTFT()");
-    return create_mean_std_pair(info.Env(), _metrics.get_ttft());
-}
-
-Napi::Value PerfMetricsWrapper::get_tpot(const Napi::CallbackInfo& info) {
-    VALIDATE_ARGS_COUNT(info, 0, "getTPOT()");
-    return create_mean_std_pair(info.Env(), _metrics.get_tpot());
-}
-
-Napi::Value PerfMetricsWrapper::get_ipot(const Napi::CallbackInfo& info) {
-    VALIDATE_ARGS_COUNT(info, 0, "getIPOT()");
-    return create_mean_std_pair(info.Env(), _metrics.get_ipot());
-}
-
-Napi::Value PerfMetricsWrapper::get_throughput(const Napi::CallbackInfo& info) {
-    VALIDATE_ARGS_COUNT(info, 0, "getThroughput()");
-    return create_mean_std_pair(info.Env(), _metrics.get_throughput());
-}
-
-Napi::Value PerfMetricsWrapper::get_inference_duration(const Napi::CallbackInfo& info) {
-    VALIDATE_ARGS_COUNT(info, 0, "getInferenceDuration()");
-    return create_mean_std_pair(info.Env(), _metrics.get_inference_duration());
-}
-
-Napi::Value PerfMetricsWrapper::get_generate_duration(const Napi::CallbackInfo& info) {
-    VALIDATE_ARGS_COUNT(info, 0, "getGenerateDuration()");
-    return create_mean_std_pair(info.Env(), _metrics.get_generate_duration());
-}
-
-Napi::Value PerfMetricsWrapper::get_tokenization_duration(const Napi::CallbackInfo& info) {
-    VALIDATE_ARGS_COUNT(info, 0, "getTokenizationDuration()");
-    return create_mean_std_pair(info.Env(), _metrics.get_tokenization_duration());
-}
-
-Napi::Value PerfMetricsWrapper::get_grammar_compiler_init_times(const Napi::CallbackInfo& info) {
-    VALIDATE_ARGS_COUNT(info, 0, "getGrammarCompilerInitTimes()");
-    return cpp_map_to_js_object(info.Env(), _metrics.get_grammar_compiler_init_times());
-}
-
-Napi::Value PerfMetricsWrapper::get_grammar_compile_time(const Napi::CallbackInfo& info) {
-    VALIDATE_ARGS_COUNT(info, 0, "getGrammarCompileTime()");
-    return create_summary_stats(info.Env(), _metrics.get_grammar_compile_time());
-};
-
-Napi::Value PerfMetricsWrapper::get_detokenization_duration(const Napi::CallbackInfo& info) {
-    VALIDATE_ARGS_COUNT(info, 0, "getDetokenizationDuration()");
-    return create_mean_std_pair(info.Env(), _metrics.get_detokenization_duration());
-}
-
 Napi::Value PerfMetricsWrapper::get_raw_metrics(const Napi::CallbackInfo& info) {
-    Napi::Object obj = Napi::Object::New(info.Env());
-    obj.Set("generateDurations",
-            cpp_to_js<std::vector<float>, Napi::Value>(
-                info.Env(),
-                get_ms(_metrics.raw_metrics, &ov::genai::RawPerfMetrics::generate_durations)));
-    obj.Set("tokenizationDurations",
-            cpp_to_js<std::vector<float>, Napi::Value>(
-                info.Env(),
-                get_ms(_metrics.raw_metrics, &ov::genai::RawPerfMetrics::tokenization_durations)));
-    obj.Set("detokenizationDurations",
-            cpp_to_js<std::vector<float>, Napi::Value>(
-                info.Env(),
-                get_ms(_metrics.raw_metrics, &ov::genai::RawPerfMetrics::detokenization_durations)));
-
-    obj.Set("timesToFirstToken",
-            cpp_to_js<std::vector<float>, Napi::Value>(
-                info.Env(),
-                get_ms(_metrics.raw_metrics, &ov::genai::RawPerfMetrics::m_times_to_first_token)));
-    obj.Set("newTokenTimes",
-            cpp_to_js<std::vector<double>, Napi::Value>(
-                info.Env(),
-                timestamp_to_ms(_metrics.raw_metrics, &ov::genai::RawPerfMetrics::m_new_token_times)));
-    obj.Set("tokenInferDurations",
-            cpp_to_js<std::vector<float>, Napi::Value>(
-                info.Env(),
-                get_ms(_metrics.raw_metrics, &ov::genai::RawPerfMetrics::m_token_infer_durations)));
-    obj.Set("batchSizes", cpp_to_js<std::vector<size_t>, Napi::Value>(info.Env(), _metrics.raw_metrics.m_batch_sizes));
-    obj.Set("durations",
-            cpp_to_js<std::vector<float>, Napi::Value>(
-                info.Env(),
-                get_ms(_metrics.raw_metrics, &ov::genai::RawPerfMetrics::m_durations)));
-    obj.Set("inferenceDurations",
-            cpp_to_js<std::vector<float>, Napi::Value>(
-                info.Env(),
-                get_ms(_metrics.raw_metrics, &ov::genai::RawPerfMetrics::m_inference_durations)));
-
-    obj.Set("grammarCompileTimes",
-            cpp_to_js<std::vector<float>, Napi::Value>(
-                info.Env(),
-                get_ms(_metrics.raw_metrics, &ov::genai::RawPerfMetrics::m_grammar_compile_times)));
-
-    return obj;
-}
-
-Napi::Value PerfMetricsWrapper::add(const Napi::CallbackInfo& info) {
-    VALIDATE_ARGS_COUNT(info, 1, "add()");
-    const auto env = info.Env();
-    try {
-        _metrics += unwrap<ov::genai::PerfMetrics>(env, info[0]);
-    } catch (const std::exception& ex) {
-        Napi::TypeError::New(env, ex.what()).ThrowAsJavaScriptException();
-    }
-    return info.This();
-}
-
-ov::genai::PerfMetrics& PerfMetricsWrapper::get_value() {
-    return _metrics;
+    return BasePerfMetricsWrapper<PerfMetricsWrapper>::get_raw_metrics(info);
 }
diff --git a/src/js/src/vlm_pipeline/finish_chat_worker.cpp b/src/js/src/vlm_pipeline/finish_chat_worker.cpp
new file mode 100644
index 0000000000..764be64719
--- /dev/null
+++ b/src/js/src/vlm_pipeline/finish_chat_worker.cpp
@@ -0,0 +1,16 @@
+// Copyright (C) 2018-2025 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+
+#include "include/vlm_pipeline/finish_chat_worker.hpp"
+
+VLMFinishChatWorker::VLMFinishChatWorker(Function& callback, std::shared_ptr<ov::genai::VLMPipeline>& pipe)
+    : AsyncWorker(callback),
+      pipe(pipe) {};
+
+void VLMFinishChatWorker::Execute() {
+    this->pipe->finish_chat();
+};
+
+void VLMFinishChatWorker::OnOK() {
+    Callback().Call({Env().Null()});
+};
diff --git a/src/js/src/vlm_pipeline/init_worker.cpp b/src/js/src/vlm_pipeline/init_worker.cpp
new file mode 100644
index 0000000000..49e93608da
--- /dev/null
+++ b/src/js/src/vlm_pipeline/init_worker.cpp
@@ -0,0 +1,32 @@
+// Copyright (C) 2018-2025 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+
+#include "include/vlm_pipeline/init_worker.hpp"
+
+VLMInitWorker::VLMInitWorker(Function& callback,
+                             std::shared_ptr<ov::genai::VLMPipeline>& pipe,
+                             std::shared_ptr<bool> is_initializing,
+                             const std::string model_path,
+                             const std::string device,
+                             const ov::AnyMap properties)
+    : AsyncWorker(callback),
+      pipe(pipe),
+      is_initializing(is_initializing),
+      model_path(model_path),
+      device(device),
+      properties(properties) {};
+
+void VLMInitWorker::Execute() {
+    *this->is_initializing = true;
+    this->pipe = std::make_shared<ov::genai::VLMPipeline>(this->model_path, this->device, this->properties);
+};
+
+void VLMInitWorker::OnOK() {
+    *this->is_initializing = false;
+    Callback().Call({Env().Null()});
+};
+
+void VLMInitWorker::OnError(const Error& e) {
+    *this->is_initializing = false;
+    Callback().Call({Napi::Error::New(Env(), e.Message()).Value()});
+};
diff --git a/src/js/src/vlm_pipeline/perf_metrics.cpp b/src/js/src/vlm_pipeline/perf_metrics.cpp
new file mode 100644
index 0000000000..6e2a258df8
--- /dev/null
+++ b/src/js/src/vlm_pipeline/perf_metrics.cpp
@@ -0,0 +1,48 @@
+// Copyright (C) 2018-2025 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+
+#include "include/vlm_pipeline/perf_metrics.hpp"
+
+#include "include/addon.hpp"
+#include "include/helper.hpp"
+
+using ov::genai::common_bindings::utils::get_ms;
+
+VLMPerfMetricsWrapper::VLMPerfMetricsWrapper(const Napi::CallbackInfo& info)
+    : BasePerfMetricsWrapper<VLMPerfMetricsWrapper, ov::genai::VLMPerfMetrics>(info) {}
+
+Napi::Function VLMPerfMetricsWrapper::get_class(Napi::Env env) {
+    auto properties = BasePerfMetricsWrapper<VLMPerfMetricsWrapper, ov::genai::VLMPerfMetrics>::get_class_properties();
+    properties.push_back(
+        InstanceMethod("getPrepareEmbeddingsDuration", &VLMPerfMetricsWrapper::get_prepare_embeddings_duration));
+    properties.push_back(InstanceAccessor<&VLMPerfMetricsWrapper::get_vlm_raw_metrics>("vlmRawMetrics"));
+    return DefineClass(env, "VLMPerfMetrics", properties);
+}
+
+Napi::Object VLMPerfMetricsWrapper::wrap(Napi::Env env, const ov::genai::VLMPerfMetrics& metrics) {
+    const auto& prototype = env.GetInstanceData<AddonData>()->vlm_perf_metrics;
+    OPENVINO_ASSERT(prototype, "Invalid pointer to prototype.");
+    auto obj = prototype.New({});
+    const auto m_ptr = Napi::ObjectWrap<VLMPerfMetricsWrapper>::Unwrap(obj);
+    m_ptr->_metrics = metrics;
+    return obj;
+}
+
+Napi::Value VLMPerfMetricsWrapper::get_prepare_embeddings_duration(const Napi::CallbackInfo& info) {
+    VALIDATE_ARGS_COUNT(info, 0, "getPrepareEmbeddingsDuration()");
+    return perf_utils::create_mean_std_pair(info.Env(), _metrics.get_prepare_embeddings_duration());
+}
+
+Napi::Value VLMPerfMetricsWrapper::get_raw_metrics(const Napi::CallbackInfo& info) {
+    return BasePerfMetricsWrapper<VLMPerfMetricsWrapper, ov::genai::VLMPerfMetrics>::get_raw_metrics(info);
+}
+
+Napi::Value VLMPerfMetricsWrapper::get_vlm_raw_metrics(const Napi::CallbackInfo& info) {
+    Napi::Object obj = Napi::Object::New(info.Env());
+    obj.Set("prepareEmbeddingsDurations",
+            cpp_to_js<std::vector<float>, Napi::Value>(
+                info.Env(),
+                get_ms(_metrics.vlm_raw_metrics, &ov::genai::VLMRawPerfMetrics::prepare_embeddings_durations)));
+
+    return obj;
+}
diff --git a/src/js/src/vlm_pipeline/start_chat_worker.cpp b/src/js/src/vlm_pipeline/start_chat_worker.cpp
new file mode 100644
index 0000000000..bbce8cf210
--- /dev/null
+++ b/src/js/src/vlm_pipeline/start_chat_worker.cpp
@@ -0,0 +1,19 @@
+// Copyright (C) 2018-2025 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+
+#include "include/vlm_pipeline/start_chat_worker.hpp"
+
+VLMStartChatWorker::VLMStartChatWorker(Function& callback,
+                                       std::shared_ptr<ov::genai::VLMPipeline>& pipe,
+                                       std::string system_message)
+    : AsyncWorker(callback),
+      pipe(pipe),
+      system_message(system_message) {};
+
+void VLMStartChatWorker::Execute() {
+    this->pipe->start_chat(this->system_message);
+};
+
+void VLMStartChatWorker::OnOK() {
+    Callback().Call({Env().Null()});
+};
diff --git a/src/js/src/vlm_pipeline/vlm_pipeline_wrapper.cpp b/src/js/src/vlm_pipeline/vlm_pipeline_wrapper.cpp
new file mode 100644
index 0000000000..cc1c14fbbc
--- /dev/null
+++ b/src/js/src/vlm_pipeline/vlm_pipeline_wrapper.cpp
@@ -0,0 +1,287 @@
+// Copyright (C) 2018-2025 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+
+#include "include/vlm_pipeline/vlm_pipeline_wrapper.hpp"
+
+#include <future>
+
+#include "include/addon.hpp"
+#include "include/helper.hpp"
+#include "include/tokenizer.hpp"
+#include "include/vlm_pipeline/finish_chat_worker.hpp"
+#include "include/vlm_pipeline/init_worker.hpp"
+#include "include/vlm_pipeline/perf_metrics.hpp"
+#include "include/vlm_pipeline/start_chat_worker.hpp"
+
+struct VLMTsfnContext {
+    VLMTsfnContext(std::string prompt, std::shared_ptr<bool> is_generating)
+        : prompt(prompt),
+          is_generating(is_generating) {};
+    ~VLMTsfnContext() {};
+
+    std::thread native_thread;
+    Napi::ThreadSafeFunction callback;
+    std::optional<Napi::ThreadSafeFunction> streamer;
+
+    std::string prompt;
+    std::vector<ov::Tensor> images;
+    std::vector<ov::Tensor> videos;
+    std::shared_ptr<bool> is_generating;
+    std::shared_ptr<ov::genai::VLMPipeline> pipe = nullptr;
+    std::shared_ptr<ov::AnyMap> generation_config = nullptr;
+};
+
+void vlmPerformInferenceThread(VLMTsfnContext* context) {
+    auto report_error = [context](const std::string& message) {
+        auto status = context->callback.BlockingCall([message](Napi::Env env, Napi::Function jsCallback) {
+            try {
+                jsCallback.Call(
+                    {Napi::Error::New(env, "vlmPerformInferenceThread error. " + message).Value(), env.Null()});
+            } catch (std::exception& err) {
+                std::cerr << "The callback failed when attempting to return an error from vlmPerformInferenceThread. "
+                             "Details:\n"
+                          << err.what() << std::endl;
+                std::cerr << "Original error message:\n" << message << std::endl;
+            }
+        });
+        if (status != napi_ok) {
+            std::cerr << "The BlockingCall failed with status " << status
+                      << " when trying to return an error from vlmPerformInferenceThread." << std::endl;
+            std::cerr << "Original error message:\n" << message << std::endl;
+        }
+    };
+    auto finalize = [context]() {
+        *context->is_generating = false;
+        context->callback.Release();
+        if (context->streamer.has_value()) {
+            context->streamer->Release();
+        }
+    };
+    try {
+        ov::genai::GenerationConfig config;
+        config.update_generation_config(*context->generation_config);
+
+        ov::genai::StreamerVariant streamer = std::monostate();
+        std::vector<std::string> streamer_exceptions;
+        if (context->streamer.has_value()) {
+            streamer = [context, &streamer_exceptions](std::string word) {
+                std::promise<ov::genai::StreamingStatus> resultPromise;
+                napi_status status = context->streamer->BlockingCall(
+                    [word, &resultPromise, &streamer_exceptions](Napi::Env env, Napi::Function jsCallback) {
+                        try {
+                            auto callback_result = jsCallback.Call({Napi::String::New(env, word)});
+                            if (callback_result.IsNumber()) {
+                                resultPromise.set_value(static_cast<ov::genai::StreamingStatus>(
+                                    callback_result.As<Napi::Number>().Int32Value()));
+                            } else {
+                                resultPromise.set_value(ov::genai::StreamingStatus::RUNNING);
+                            }
+                        } catch (std::exception& err) {
+                            streamer_exceptions.push_back(err.what());
+                            resultPromise.set_value(ov::genai::StreamingStatus::CANCEL);
+                        }
+                    });
+
+                if (status != napi_ok) {
+                    streamer_exceptions.push_back("The streamer callback BlockingCall failed with the status: " +
+                                                  status);
+                    return ov::genai::StreamingStatus::CANCEL;
+                }
+
+                return resultPromise.get_future().get();
+            };
+        }
+
+        ov::genai::VLMDecodedResults result;
+
+        result = context->pipe->generate(context->prompt, context->images, context->videos, config, streamer);
+
+        if (!streamer_exceptions.empty()) {
+            // If there were exceptions from the streamer, report them all as a single error and finish without result
+            std::string combined_error = "Streamer exceptions occurred:\n";
+            for (size_t i = 0; i < streamer_exceptions.size(); ++i) {
+                combined_error += "[" + std::to_string(i + 1) + "] " + streamer_exceptions[i] + "\n";
+            }
+            report_error(combined_error);
+        } else {
+            // If no exceptions from streamer, call the final callback with the result
+            napi_status status =
+                context->callback.BlockingCall([result, &report_error](Napi::Env env, Napi::Function jsCallback) {
+                    try {
+                        jsCallback.Call({
+                            env.Null(),                         // Error should be null in normal case
+                            to_vlm_decoded_result(env, result)  // Return DecodedResults as the final result
+                        });
+                    } catch (std::exception& err) {
+                        report_error("The final callback failed. Details:\n" + std::string(err.what()));
+                    }
+                });
+
+            if (status != napi_ok) {
+                report_error("The final BlockingCall failed with status " + status);
+            }
+        }
+        finalize();
+    } catch (std::exception& e) {
+        report_error(e.what());
+        finalize();
+    }
+}
+
+VLMPipelineWrapper::VLMPipelineWrapper(const Napi::CallbackInfo& info) : Napi::ObjectWrap<VLMPipelineWrapper>(info) {};
+
+Napi::Function VLMPipelineWrapper::get_class(Napi::Env env) {
+    return DefineClass(env,
+                       "VLMPipeline",
+                       {InstanceMethod("init", &VLMPipelineWrapper::init),
+                        InstanceMethod("generate", &VLMPipelineWrapper::generate),
+                        InstanceMethod("getTokenizer", &VLMPipelineWrapper::get_tokenizer),
+                        InstanceMethod("startChat", &VLMPipelineWrapper::start_chat),
+                        InstanceMethod("finishChat", &VLMPipelineWrapper::finish_chat),
+                        InstanceMethod("setChatTemplate", &VLMPipelineWrapper::set_chat_template),
+                        InstanceMethod("setGenerationConfig", &VLMPipelineWrapper::set_generation_config)});
+}
+
+Napi::Value VLMPipelineWrapper::init(const Napi::CallbackInfo& info) {
+    auto env = info.Env();
+    try {
+        OPENVINO_ASSERT(!this->pipe, "Pipeline is already initialized");
+        OPENVINO_ASSERT(!*this->is_initializing, "Pipeline is already initializing");
+        VALIDATE_ARGS_COUNT(info, 4, "init()");
+        const std::string model_path = js_to_cpp<std::string>(env, info[0]);
+        const std::string device = js_to_cpp<std::string>(env, info[1]);
+        const auto& properties = js_to_cpp<ov::AnyMap>(env, info[2]);
+        OPENVINO_ASSERT(info[3].IsFunction(), "init callback is not a function");
+        Napi::Function callback = info[3].As<Napi::Function>();
+
+        VLMInitWorker* asyncWorker =
+            new VLMInitWorker(callback, this->pipe, this->is_initializing, model_path, device, properties);
+        asyncWorker->Queue();
+    } catch (const std::exception& ex) {
+        Napi::Error::New(env, ex.what()).ThrowAsJavaScriptException();
+    }
+    return env.Undefined();
+}
+
+Napi::Value VLMPipelineWrapper::generate(const Napi::CallbackInfo& info) {
+    auto env = info.Env();
+    try {
+        OPENVINO_ASSERT(this->pipe, "VLMPipeline is not initialized");
+        OPENVINO_ASSERT(!*this->is_generating, "Another generation is already in progress");
+        *this->is_generating = true;
+        VALIDATE_ARGS_COUNT(info, 6, "generate()");
+        VLMTsfnContext* context = nullptr;
+
+        // Arguments: prompt, images, videos, streamer, generationConfig, callback
+        auto prompt = js_to_cpp<std::string>(env, info[0]);
+        auto images = js_to_cpp<std::vector<ov::Tensor>>(env, info[1]);
+        auto videos = js_to_cpp<std::vector<ov::Tensor>>(env, info[2]);
+        OPENVINO_ASSERT(info[3].IsFunction() || info[3].IsUndefined(), "generate callback is not a function");
+        auto streamer = info[3].As<Napi::Function>();
+        auto generation_config = js_to_cpp<ov::AnyMap>(env, info[4]);
+        OPENVINO_ASSERT(info[5].IsFunction(), "generate callback is not a function");
+        auto callback = info[5].As<Napi::Function>();
+
+        context = new VLMTsfnContext(prompt, this->is_generating);
+        context->images = std::move(images);
+        context->videos = std::move(videos);
+        context->pipe = this->pipe;
+        context->generation_config = std::make_shared<ov::AnyMap>(generation_config);
+
+        context->callback =
+            Napi::ThreadSafeFunction::New(env,
+                                          callback,                     // JavaScript function called asynchronously
+                                          "VLM_generate_callback",      // Name
+                                          0,                            // Unlimited queue
+                                          1,                            // Only one thread will use this initially
+                                          [context, this](Napi::Env) {  // Finalizer used to clean threads up
+                                              context->native_thread.join();
+                                              delete context;
+                                          });
+        if (!streamer.IsUndefined()) {
+            context->streamer = Napi::ThreadSafeFunction::New(env,
+                                                              streamer,  // JavaScript function called asynchronously
+                                                              "VLM_generate_streamer",  // Name
+                                                              0,                        // Unlimited queue
+                                                              1);  // Only one thread will use this initially
+        }
+        context->native_thread = std::thread(vlmPerformInferenceThread, context);
+    } catch (const std::exception& ex) {
+        Napi::Error::New(env, ex.what()).ThrowAsJavaScriptException();
+        *this->is_generating = false;
+    }
+    return env.Undefined();
+}
+
+Napi::Value VLMPipelineWrapper::start_chat(const Napi::CallbackInfo& info) {
+    auto env = info.Env();
+    try {
+        OPENVINO_ASSERT(this->pipe, "VLMPipeline is not initialized");
+        VALIDATE_ARGS_COUNT(info, 2, "startChat()");
+        auto system_message = js_to_cpp<std::string>(env, info[0]);
+        OPENVINO_ASSERT(info[1].IsFunction(), "startChat callback is not a function");
+        auto callback = info[1].As<Napi::Function>();
+
+        VLMStartChatWorker* asyncWorker = new VLMStartChatWorker(callback, this->pipe, system_message);
+        asyncWorker->Queue();
+    } catch (const std::exception& ex) {
+        Napi::Error::New(env, ex.what()).ThrowAsJavaScriptException();
+    }
+    return env.Undefined();
+}
+
+Napi::Value VLMPipelineWrapper::finish_chat(const Napi::CallbackInfo& info) {
+    auto env = info.Env();
+    try {
+        OPENVINO_ASSERT(this->pipe, "VLMPipeline is not initialized");
+        VALIDATE_ARGS_COUNT(info, 1, "finishChat()");
+        OPENVINO_ASSERT(info[0].IsFunction(), "finishChat callback is not a function");
+        Napi::Function callback = info[0].As<Napi::Function>();
+
+        VLMFinishChatWorker* asyncWorker = new VLMFinishChatWorker(callback, this->pipe);
+        asyncWorker->Queue();
+    } catch (const std::exception& ex) {
+        Napi::Error::New(env, ex.what()).ThrowAsJavaScriptException();
+    }
+    return env.Undefined();
+}
+
+Napi::Value VLMPipelineWrapper::get_tokenizer(const Napi::CallbackInfo& info) {
+    auto env = info.Env();
+    try {
+        OPENVINO_ASSERT(this->pipe, "VLMPipeline is not initialized");
+        auto tokenizer = this->pipe->get_tokenizer();
+        return TokenizerWrapper::wrap(env, tokenizer);
+    } catch (const std::exception& ex) {
+        Napi::Error::New(env, ex.what()).ThrowAsJavaScriptException();
+    }
+    return env.Undefined();
+}
+
+Napi::Value VLMPipelineWrapper::set_chat_template(const Napi::CallbackInfo& info) {
+    auto env = info.Env();
+    try {
+        OPENVINO_ASSERT(this->pipe, "VLMPipeline is not initialized");
+        VALIDATE_ARGS_COUNT(info, 1, "setChatTemplate()");
+        auto chat_template = js_to_cpp<std::string>(env, info[0]);
+        this->pipe->set_chat_template(chat_template);
+    } catch (const std::exception& ex) {
+        Napi::Error::New(env, ex.what()).ThrowAsJavaScriptException();
+    }
+    return env.Undefined();
+}
+
+Napi::Value VLMPipelineWrapper::set_generation_config(const Napi::CallbackInfo& info) {
+    auto env = info.Env();
+    try {
+        OPENVINO_ASSERT(this->pipe, "VLMPipeline is not initialized");
+        VALIDATE_ARGS_COUNT(info, 1, "setGenerationConfig()");
+        auto config_map = js_to_cpp<ov::AnyMap>(env, info[0]);
+        ov::genai::GenerationConfig config;
+        config.update_generation_config(config_map);
+        this->pipe->set_generation_config(config);
+    } catch (const std::exception& ex) {
+        Napi::Error::New(env, ex.what()).ThrowAsJavaScriptException();
+    }
+    return env.Undefined();
+}
diff --git a/src/js/tests/models.js b/src/js/tests/models.js
index f3daf59986..5fef7cda40 100644
--- a/src/js/tests/models.js
+++ b/src/js/tests/models.js
@@ -2,4 +2,5 @@ export const models = {
   LLM: "OpenVINO/Llama-3.1-8B-Instruct-FastDraft-150M-int8-ov",
   InstructLLM: "OpenVINO/Qwen2.5-1.5B-Instruct-int4-ov",
   Embedding: "OpenVINO/bge-base-en-v1.5-fp16-ov",
+  VLM: "OpenVINO/Qwen2-VL-7B-Instruct-int4-ov",
 };
diff --git a/src/js/tests/utils.js b/src/js/tests/utils.js
index be886b4bcb..0416941d99 100644
--- a/src/js/tests/utils.js
+++ b/src/js/tests/utils.js
@@ -1,6 +1,7 @@
 import { bootstrap } from "global-agent";
 import { promises as fs } from "node:fs";
 import { listFiles, downloadFile } from "@huggingface/hub";
+import { addon as ov } from "openvino-node";
 
 const BASE_DIR = "./tests/models/";
 
@@ -45,3 +46,60 @@ async function saveFile(file, response) {
 
   await fs.writeFile(file, Buffer.from(arrayBuffer));
 }
+
+/**
+ * Creates a synthetic test image tensor with a gradient pattern.
+ *
+ * Generates a small RGB image filled with a gradient pattern for testing VLM pipelines.
+ * The red channel varies by height, green by width, and blue is constant.
+ *
+ * @param height - Height of the image in pixels. (default: 32)
+ * @param width - Width of the image in pixels. (default: 32)
+ * @returns An OpenVINO Tensor with shape [height, width, channels] and uint8 data type.
+ */
+export function createTestImageTensor(height = 32, width = 32) {
+  const channels = 3;
+  const data = new Uint8Array(height * width * channels);
+
+  // Fill with gradient pattern
+  for (let h = 0; h < height; h++) {
+    for (let w = 0; w < width; w++) {
+      const idx = (h * width + w) * channels;
+      data[idx] = h * 8; // R
+      data[idx + 1] = w * 8; // G
+      data[idx + 2] = 128; // B
+    }
+  }
+
+  return new ov.Tensor("u8", [height, width, channels], data);
+}
+
+/**
+ * Creates a synthetic test video tensor with multiple frames.
+ *
+ * Generates a video tensor with a synthetic pattern that varies across frames.
+ * Each frame has a slightly different color pattern to simulate temporal variation.
+ * Useful for testing VLM pipelines with video inputs.
+ *
+ * @param frames - Number of video frames to generate. (default: 4)
+ * @param height - Height of each frame in pixels. (default: 32)
+ * @param width - Width of each frame in pixels. (default: 32)
+ * @returns An OpenVINO Tensor with shape [frames, height, width, channels] and uint8 data type.
+ */
+export function createTestVideoTensor(frames = 4, height = 32, width = 32) {
+  const channels = 3;
+  const data = new Uint8Array(frames * height * width * channels);
+
+  for (let f = 0; f < frames; f++) {
+    for (let h = 0; h < height; h++) {
+      for (let w = 0; w < width; w++) {
+        const idx = (f * height * width + h * width + w) * channels;
+        data[idx] = (h + f * 10) % 256;
+        data[idx + 1] = (w + f * 10) % 256;
+        data[idx + 2] = 128;
+      }
+    }
+  }
+
+  return new ov.Tensor("u8", [frames, height, width, channels], data);
+}
diff --git a/src/js/tests/vlmPipeline.test.js b/src/js/tests/vlmPipeline.test.js
new file mode 100644
index 0000000000..8611b772e8
--- /dev/null
+++ b/src/js/tests/vlmPipeline.test.js
@@ -0,0 +1,153 @@
+// Copyright (C) 2018-2025 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+
+import { Tokenizer, VLMPipeline, DecodedResults, VLMDecodedResults } from "../dist/index.js";
+
+import assert from "node:assert";
+import { describe, it, before } from "node:test";
+import { models } from "./models.js";
+import { createTestImageTensor, createTestVideoTensor } from "./utils.js";
+
+const MODEL_PATH = process.env.VLM_MODEL_PATH || `./tests/models/${models.VLM.split("/")[1]}`;
+
+// Skip tests on macOS due to insufficient memory
+describe("VLMPipeline", { skip: process.platform === "darwin" }, () => {
+  let pipeline, testImage1, testImage2, testVideo1, testVideo2;
+
+  before(async () => {
+    pipeline = await VLMPipeline(MODEL_PATH, "CPU");
+    pipeline.setGenerationConfig({ max_new_tokens: 10 });
+    testImage1 = createTestImageTensor();
+    testImage2 = createTestImageTensor(50, 50);
+    testVideo1 = createTestVideoTensor();
+    testVideo2 = createTestVideoTensor(6, 64, 64);
+  });
+
+  it("should generate text without images", async () => {
+    const result = await pipeline.generate("What is 2+2?");
+
+    assert.ok(result instanceof DecodedResults, "Result should be instance of DecodedResults");
+    assert.ok(
+      result instanceof VLMDecodedResults,
+      "Result should be instance of VLMDecodedResults",
+    );
+    assert.ok(result.texts.length > 0, "Should generate some output");
+  });
+
+  it("should generate text with images", async () => {
+    const result = await pipeline.generate("Compare these two images.", {
+      images: [testImage1, testImage2],
+    });
+
+    assert.strictEqual(result.texts.length, 1, "Should generate comparison");
+  });
+
+  it("should generate text with video input", async () => {
+    const result = await pipeline.generate("Describe what happens in this video.", {
+      videos: [testVideo1],
+      generationConfig: {
+        max_new_tokens: 20,
+        temperature: 0,
+      },
+    });
+
+    assert.strictEqual(result.texts.length, 1);
+  });
+
+  it("should generate with both image and video", async () => {
+    const result = await pipeline.generate("Compare the image and video.", {
+      images: [testImage1],
+      videos: [testVideo2],
+      generationConfig: { max_new_tokens: 20, temperature: 0 },
+    });
+
+    assert.strictEqual(result.texts.length, 1);
+  });
+
+  it("throw error on invalid streamer", async () => {
+    await assert.rejects(
+      pipeline.generate("What is 2+2?", {
+        streamer: () => {
+          throw new Error("Test error");
+        },
+      }),
+      /Test error/,
+    );
+  });
+
+  it("throw error with invalid generationConfig", async () => {
+    await assert.rejects(
+      pipeline.generate("What is 2+2?", {
+        generationConfig: { max_new_tokens: "five" },
+      }),
+      /vlmPerformInferenceThread error/,
+    );
+  });
+
+  it("should support streaming generation", async () => {
+    const chunks = [];
+
+    const stream = pipeline.stream("What do you see?", {
+      images: [testImage1],
+      generationConfig: {
+        max_new_tokens: 15,
+        temperature: 0,
+      },
+    });
+
+    for await (const chunk of stream) {
+      chunks.push(chunk);
+    }
+
+    assert.ok(chunks.length > 0, "Should receive streaming chunks");
+    const fullOutput = chunks.join("");
+    assert.ok(fullOutput.length > 0, "Combined chunks should form output");
+  });
+
+  it("should return VLMDecodedResults with perfMetrics", async () => {
+    const result = await pipeline.generate("Describe the image.", {
+      images: [testImage2],
+      generationConfig: {
+        max_new_tokens: 10,
+        temperature: 0,
+      },
+    });
+
+    assert.ok(result, "Should return result");
+    assert.ok(result.perfMetrics, "Should have perfMetrics");
+    // Property from base PerformanceMetrics
+    const numTokens = result.perfMetrics.getNumGeneratedTokens();
+    assert.ok(typeof numTokens === "number", "getNumGeneratedTokens should return number");
+    assert.ok(
+      0 < numTokens && numTokens <= 10,
+      "Number of tokens should be between 0 and max_new_tokens",
+    );
+    // VLM-specific properties
+    const prepareEmbeddings = result.perfMetrics.getPrepareEmbeddingsDuration();
+    assert.ok(
+      typeof prepareEmbeddings.mean === "number",
+      "PrepareEmbeddingsDuration should have mean",
+    );
+    const { prepareEmbeddingsDurations } = result.perfMetrics.vlmRawMetrics;
+    assert.ok(
+      Array.isArray(prepareEmbeddingsDurations),
+      "Should have duration of preparation of embeddings",
+    );
+    assert.ok(prepareEmbeddingsDurations.length > 0, "Should have at least one duration value");
+  });
+
+  it("should get tokenizer from pipeline", () => {
+    const tokenizer = pipeline.getTokenizer();
+    assert.ok(tokenizer instanceof Tokenizer, "Should return tokenizer");
+  });
+
+  it("should start and finish chat", async () => {
+    await pipeline.startChat("You are an assistant named Tom.");
+    const result1 = await pipeline.generate("What is your name?");
+    assert.ok(/Tom/.test(result1.toString()));
+
+    await pipeline.finishChat();
+    const result2 = await pipeline.generate("What is your name?");
+    assert.ok(!/Tom/.test(result2.toString()));
+  });
+});