Skip to content

Commit 3dbd738

Browse files
rdheekondaRaja Sekhar Rao Dheekondaromanlutz
authored
Fix XPIAOrchestrator Blob Not Found Exception (#694)
Co-authored-by: Raja Sekhar Rao Dheekonda <[email protected]> Co-authored-by: Roman Lutz <[email protected]>
1 parent 6d07f5b commit 3dbd738

File tree

7 files changed

+484
-738
lines changed

7 files changed

+484
-738
lines changed

doc/code/orchestrators/3_xpia_orchestrator.ipynb

Lines changed: 398 additions & 707 deletions
Large diffs are not rendered by default.

doc/code/orchestrators/3_xpia_orchestrator.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,6 @@
5353
# This is to simulate a processing target with a plugin similar to what one might expect in an XPIA-oriented AI red teaming operation.
5454

5555
# %%
56-
5756
from xpia_helpers import AzureStoragePlugin, SemanticKernelPluginAzureOpenAIPromptTarget
5857

5958
from pyrit.common import IN_MEMORY, initialize_pyrit
@@ -82,7 +81,6 @@
8281
#
8382
# Finally, we can put all the pieces together:
8483
# %%
85-
8684
from pyrit.orchestrator import XPIATestOrchestrator
8785
from pyrit.prompt_target import AzureBlobStorageTarget
8886
from pyrit.score import SubStringScorer
@@ -107,7 +105,6 @@
107105
# Clean up storage container
108106

109107
# %%
110-
111108
import os
112109

113110
from xpia_helpers import AzureStoragePlugin
@@ -120,3 +117,5 @@
120117

121118
memory = CentralMemory.get_memory_instance()
122119
memory.dispose_engine()
120+
121+
# %%

doc/code/orchestrators/xpia_helpers.py

Lines changed: 47 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import logging
22
from typing import Any, Optional
3+
from urllib.parse import urlparse
34

45
from azure.storage.blob.aio import ContainerClient as AsyncContainerClient
56
from openai import AsyncAzureOpenAI
@@ -84,19 +85,19 @@ def __init__(
8485

8586
self._kernel = Kernel()
8687

87-
service_id = "chat"
88+
self._service_id = "chat"
8889

8990
self._kernel.add_service(
9091
AzureChatCompletion(
91-
service_id=service_id, deployment_name=self._deployment_name, async_client=self._async_client
92+
service_id=self._service_id, deployment_name=self._deployment_name, async_client=self._async_client
9293
),
9394
)
9495

9596
self._plugin_name = plugin_name
96-
self._kernel.import_plugin_from_object(plugin, plugin_name)
97+
self._kernel.add_plugin(plugin, plugin_name)
9798

9899
self._execution_settings = AzureChatPromptExecutionSettings(
99-
service_id=service_id,
100+
service_id=self._service_id,
100101
ai_model_id=self._deployment_name,
101102
max_tokens=max_tokens,
102103
temperature=temperature,
@@ -136,16 +137,36 @@ async def send_prompt_async(self, *, prompt_request: PromptRequestResponse) -> P
136137
template=request.converted_value,
137138
name=self._plugin_name,
138139
template_format="semantic-kernel",
139-
execution_settings=self._execution_settings,
140+
execution_settings={self._service_id: self._execution_settings},
140141
)
141-
processing_function = self._kernel.create_function_from_prompt(
142+
processing_function = self._kernel.add_function(
142143
function_name="processingFunc", plugin_name=self._plugin_name, prompt_template_config=prompt_template_config
143144
)
144-
processing_output = await self._kernel.invoke(processing_function)
145-
processing_output = str(processing_output)
145+
processing_output = await self._kernel.invoke(processing_function) # type: ignore
146+
if processing_output is None:
147+
raise ValueError("Processing function returned None unexpectedly.")
148+
try:
149+
inner_content = processing_output.get_inner_content()
150+
151+
if (
152+
not hasattr(inner_content, "choices")
153+
or not isinstance(inner_content.choices, list)
154+
or not inner_content.choices
155+
):
156+
raise ValueError("Invalid response: 'choices' is missing or empty.")
157+
158+
first_choice = inner_content.choices[0]
159+
160+
if not hasattr(first_choice, "message") or not hasattr(first_choice.message, "content"):
161+
raise ValueError("Invalid response: 'message' or 'content' is missing in choices[0].")
162+
163+
processing_output = first_choice.message.content
164+
165+
except AttributeError as e:
166+
raise ValueError(f"Unexpected structure in processing_output: {e}")
146167
logger.info(f'Received the following response from the prompt target "{processing_output}"')
147168

148-
response = construct_response_from_request(request=request, response_text_pieces=[processing_output])
169+
response = construct_response_from_request(request=request, response_text_pieces=[str(processing_output)])
149170
return response
150171

151172
def _validate_request(self, *, prompt_request: PromptRequestResponse) -> None:
@@ -182,16 +203,17 @@ async def _create_container_client_async(self) -> None:
182203
"""Creates an asynchronous ContainerClient for Azure Storage. If a SAS token is provided via the
183204
AZURE_STORAGE_ACCOUNT_SAS_TOKEN environment variable or the init sas_token parameter, it will be used
184205
for authentication. Otherwise, a delegation SAS token will be created using Entra ID authentication."""
206+
container_url, _ = self._parse_url()
185207
try:
186208
sas_token: str = default_values.get_required_value(
187209
env_var_name=self.SAS_TOKEN_ENVIRONMENT_VARIABLE, passed_value=self._sas_token
188210
)
189211
logger.info("Using SAS token from environment variable or passed parameter.")
190212
except ValueError:
191213
logger.info("SAS token not provided. Creating a delegation SAS token using Entra ID authentication.")
192-
sas_token = await AzureStorageAuth.get_sas_token(self._container_url)
214+
sas_token = await AzureStorageAuth.get_sas_token(container_url)
193215
self._storage_client = AsyncContainerClient.from_container_url(
194-
container_url=self._container_url,
216+
container_url=container_url,
195217
credential=sas_token,
196218
)
197219

@@ -204,8 +226,10 @@ async def download_async(self) -> str:
204226
await self._create_container_client_async()
205227

206228
all_blobs = ""
229+
# Parse the Azure Storage Blob URL to extract components
230+
_, blob_prefix = self._parse_url()
207231
async with self._storage_client as client:
208-
async for blob in client.list_blobs():
232+
async for blob in client.list_blobs(name_starts_with=blob_prefix):
209233
logger.info(f"Downloading Azure storage blob {blob.name}")
210234
blob_client = client.get_blob_client(blob=blob.name)
211235
blob_data = await blob_client.download_blob()
@@ -223,11 +247,21 @@ async def delete_blobs_async(self):
223247
await self._create_container_client_async()
224248
logger.info("Deleting all blobs in the container.")
225249
try:
250+
_, blob_prefix = self._parse_url()
226251
async with self._storage_client as client:
227-
async for blob in client.list_blobs():
252+
async for blob in client.list_blobs(name_starts_with=blob_prefix):
228253
print("blob name is given as", blob.name)
229254
await client.get_blob_client(blob=blob.name).delete_blob()
230255
logger.info(f"Deleted blob: {blob.name}")
231256
except Exception as ex:
232257
logger.exception(msg=f"An error occurred while deleting blobs: {ex}")
233258
raise
259+
260+
def _parse_url(self):
261+
"""Parses the Azure Storage Blob URL to extract components."""
262+
parsed_url = urlparse(self._container_url)
263+
path_parts = parsed_url.path.split("/")
264+
container_name = path_parts[1]
265+
blob_prefix = "/".join(path_parts[2:])
266+
container_url = f"https://{parsed_url.netloc}/{container_name}"
267+
return container_url, blob_prefix

pyproject.toml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -93,7 +93,7 @@ dev = [
9393
"pytest-asyncio>=0.23.5",
9494
"pytest-cov>=4.0.0",
9595
"respx>=0.22.0",
96-
"semantic-kernel==0.9.4b1",
96+
"semantic-kernel>=1.20.0",
9797
"types-PyYAML>=6.0.12.9",
9898
]
9999
torch = [
@@ -131,7 +131,7 @@ all = [
131131
"pytest-asyncio>=0.23.5",
132132
"pytest-cov>=4.0.0",
133133
"respx>=0.20.2",
134-
"semantic-kernel==0.9.4b1",
134+
"semantic-kernel>=1.20.0",
135135
"sentencepiece==0.2.0",
136136
"torch>=2.3.0",
137137
"playwright==1.49.0",

pyrit/models/data_type_serializer.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -113,6 +113,11 @@ def _get_storage_io(self):
113113
ValueError: If the Azure Storage URL is detected but the datasets storage handle is not set.
114114
"""
115115
if self._is_azure_storage_url(self.value):
116+
# Scenarios where a user utilizes an in-memory DuckDB but also needs to interact
117+
# with an Azure Storage Account, ex., XPIAOrchestrator.
118+
from pyrit.common import AZURE_SQL, initialize_pyrit
119+
120+
initialize_pyrit(memory_db_type=AZURE_SQL)
116121
return self._memory.results_storage_io
117122
return DiskStorageIO()
118123

pyrit/prompt_target/azure_blob_storage_target.py

Lines changed: 18 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
import logging
55
from enum import Enum
66
from typing import Optional
7+
from urllib.parse import urlparse
78

89
from azure.core.exceptions import ClientAuthenticationError
910
from azure.storage.blob import ContentSettings
@@ -69,17 +70,17 @@ async def _create_container_client_async(self) -> None:
6970
"""Creates an asynchronous ContainerClient for Azure Storage. If a SAS token is provided via the
7071
AZURE_STORAGE_ACCOUNT_SAS_TOKEN environment variable or the init sas_token parameter, it will be used
7172
for authentication. Otherwise, a delegation SAS token will be created using Entra ID authentication."""
73+
container_url, _ = self._parse_url()
7274
try:
7375
sas_token: str = default_values.get_required_value(
7476
env_var_name=self.SAS_TOKEN_ENVIRONMENT_VARIABLE, passed_value=self._sas_token
7577
)
7678
logger.info("Using SAS token from environment variable or passed parameter.")
7779
except ValueError:
7880
logger.info("SAS token not provided. Creating a delegation SAS token using Entra ID authentication.")
79-
sas_token = await AzureStorageAuth.get_sas_token(self._container_url)
80-
81+
sas_token = await AzureStorageAuth.get_sas_token(container_url)
8182
self._client_async = AsyncContainerClient.from_container_url(
82-
container_url=self._container_url,
83+
container_url=container_url,
8384
credential=sas_token,
8485
)
8586

@@ -98,14 +99,12 @@ async def _upload_blob_async(self, file_name: str, data: bytes, content_type: st
9899

99100
if not self._client_async:
100101
await self._create_container_client_async()
101-
102+
# Parse the Azure Storage Blob URL to extract components
103+
_, blob_prefix = self._parse_url()
104+
blob_path = f"{blob_prefix}/{file_name}"
102105
try:
103-
await self._client_async.upload_blob(
104-
name=file_name,
105-
data=data,
106-
content_settings=content_settings,
107-
overwrite=True,
108-
)
106+
blob_client = self._client_async.get_blob_client(blob=blob_path)
107+
await blob_client.upload_blob(data=data, content_settings=content_settings)
109108
except Exception as exc:
110109
if isinstance(exc, ClientAuthenticationError):
111110
logger.exception(
@@ -119,6 +118,15 @@ async def _upload_blob_async(self, file_name: str, data: bytes, content_type: st
119118
logger.exception(msg=f"An unexpected error occurred: {exc}")
120119
raise
121120

121+
def _parse_url(self):
122+
"""Parses the Azure Storage Blob URL to extract components."""
123+
parsed_url = urlparse(self._container_url)
124+
path_parts = parsed_url.path.split("/")
125+
container_name = path_parts[1]
126+
blob_prefix = "/".join(path_parts[2:])
127+
container_url = f"https://{parsed_url.netloc}/{container_name}"
128+
return container_url, blob_prefix
129+
122130
@limit_requests_per_minute
123131
async def send_prompt_async(self, *, prompt_request: PromptRequestResponse) -> PromptRequestResponse:
124132
"""

tests/unit/target/test_prompt_target_azure_blob_storage.py

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,10 @@
22
# Licensed under the MIT license.
33

44
import os
5-
from unittest.mock import AsyncMock, patch
5+
from unittest.mock import AsyncMock, MagicMock, patch
66

77
import pytest
8+
from azure.storage.blob.aio import BlobClient as AsyncBlobClient
89
from azure.storage.blob.aio import ContainerClient as AsyncContainerClient
910
from unit.mocks import get_sample_conversations
1011

@@ -94,15 +95,22 @@ async def test_azure_blob_storage_validate_prev_convs(
9495

9596

9697
@pytest.mark.asyncio
97-
@patch.object(AsyncContainerClient, "upload_blob", new_callable=AsyncMock)
9898
@patch.object(AzureBlobStorageTarget, "_create_container_client_async", new_callable=AsyncMock)
99+
@patch.object(AsyncBlobClient, "upload_blob", new_callable=AsyncMock)
100+
@patch.object(AsyncContainerClient, "get_blob_client", new_callable=MagicMock)
99101
async def test_send_prompt_async(
100-
mock_create_client,
102+
mock_get_blob_client,
101103
mock_upload_blob,
104+
mock_create_client,
102105
azure_blob_storage_target: AzureBlobStorageTarget,
103106
sample_entries: list[PromptRequestPiece],
104107
):
108+
mock_blob_client = AsyncMock()
109+
mock_get_blob_client.return_value = mock_blob_client
110+
111+
mock_blob_client.upload_blob = mock_upload_blob
105112
mock_upload_blob.return_value = None
113+
106114
azure_blob_storage_target._client_async = AsyncContainerClient.from_container_url(
107115
container_url=azure_blob_storage_target._container_url, credential="mocked_sas_token"
108116
)
@@ -112,6 +120,7 @@ async def test_send_prompt_async(
112120
request = PromptRequestResponse([request_piece])
113121

114122
response = await azure_blob_storage_target.send_prompt_async(prompt_request=request)
123+
115124
assert response
116125
blob_url = response.request_pieces[0].converted_value
117126
assert azure_blob_storage_target._container_url in blob_url

0 commit comments

Comments
 (0)