diff --git a/tests/python_tests/test_llm_pipeline.py b/tests/python_tests/test_llm_pipeline.py index c26afe1dfa..f0ff6f00ea 100644 --- a/tests/python_tests/test_llm_pipeline.py +++ b/tests/python_tests/test_llm_pipeline.py @@ -184,6 +184,21 @@ def test_encoded_inputs( assert np.all(ov_res == hf_res) +@pytest.mark.parametrize("llm_model", ["katuni4ka/tiny-random-phi3"], indirect=True) +def test_readonly_input_tensor(ov_pipe: ov_genai.LLMPipeline) -> None: + input_ids = np.array([[1, 4, 42]], dtype=np.int64) + input_ids.flags.writeable = False + + attention_mask = np.array([[1, 1, 1]], dtype=np.int64) + attention_mask.flags.writeable = False + + inputs_ov = ov_genai.TokenizedInputs(ov.Tensor(input_ids), ov.Tensor(attention_mask)) + ov_pipe.generate(inputs_ov, max_new_tokens=5) + + readonly_tensor = ov.Tensor(input_ids) + ov_pipe.generate(readonly_tensor, max_new_tokens=5) + + @pytest.mark.parametrize("llm_model", MODELS_LIST, indirect=True) @pytest.mark.parametrize("generation_config_dict", TEST_CONFIGS) @pytest.mark.parametrize("prompts", BATCHED_PROMPTS) @@ -878,4 +893,3 @@ def test_pipelines_generate_with_streaming( mock_streamer.assert_not_called() else: mock_streamer.assert_called() - diff --git a/tests/python_tests/test_llm_pipeline_static.py b/tests/python_tests/test_llm_pipeline_static.py index 7c7e58d495..481a316baa 100644 --- a/tests/python_tests/test_llm_pipeline_static.py +++ b/tests/python_tests/test_llm_pipeline_static.py @@ -1,9 +1,11 @@ # Copyright (C) 2024-2025 Intel Corporation # SPDX-License-Identifier: Apache-2.0 -from openvino_genai import GenerationConfig, Tokenizer, LLMPipeline, StreamerBase, ChatHistory +from openvino_genai import GenerationConfig, Tokenizer, LLMPipeline, StreamerBase, ChatHistory, TokenizedInputs from pathlib import Path +import openvino as ov +import numpy as np import pytest import platform import sys @@ -419,3 +421,19 @@ def generate_with_chat_history(pipe: LLMPipeline, questions: list[str]) -> ChatH f"NPU chat mode output:\n{answers_chat_mode_static}\n" f"NPU chat history output:\n{answers_chat_history_static}" ) + + +@pytest.mark.parametrize("llm_model", MODELS_LIST, indirect=True) +@pytest.mark.parametrize("npu_config", PIPELINE_CONFIGS, indirect=True) +def test_readonly_input_tensor(npu_model: LLMPipeline): + input_ids = np.array([[1, 4, 42]], dtype=np.int64) + input_ids.flags.writeable = False + + attention_mask = np.array([[1, 1, 1]], dtype=np.int64) + attention_mask.flags.writeable = False + + inputs_ov = TokenizedInputs(ov.Tensor(input_ids), ov.Tensor(attention_mask)) + npu_model.generate(inputs_ov, max_new_tokens=5) + + readonly_tensor = ov.Tensor(input_ids) + npu_model.generate(readonly_tensor, max_new_tokens=5) diff --git a/tests/python_tests/test_vlm_pipeline.py b/tests/python_tests/test_vlm_pipeline.py index 8168ad86cf..bad9a1407d 100644 --- a/tests/python_tests/test_vlm_pipeline.py +++ b/tests/python_tests/test_vlm_pipeline.py @@ -450,6 +450,22 @@ def streamer(word: str) -> bool: assert res.texts[0] == "".join(result_from_streamer) +@parametrize_one_model_sdpa +def test_vlm_readonly_image_tensor(ov_pipe_model: VlmModelInfo, cat_image_32x32): + ov_pipe = ov_pipe_model.pipeline + generation_config = _setup_generation_config(ov_pipe, max_new_tokens=5) + + image_array = np.array(cat_image_32x32, dtype=np.uint8) + image_array.flags.writeable = False + + readonly_image_tensor = openvino.Tensor(image_array) + ov_pipe.generate( + PROMPTS[0], + images=[readonly_image_tensor], + generation_config=generation_config, + ) + + @pytest.mark.parametrize( "config", [