diff --git a/src/uipath/platform/context_grounding/_context_grounding_service.py b/src/uipath/platform/context_grounding/_context_grounding_service.py index 417afa6f7..218352e29 100644 --- a/src/uipath/platform/context_grounding/_context_grounding_service.py +++ b/src/uipath/platform/context_grounding/_context_grounding_service.py @@ -72,6 +72,7 @@ def __init__( self._buckets_service = buckets_service super().__init__(config=config, execution_context=execution_context) + # 2.3.0 prefix trace name with contextgrounding @traced(name="add_to_index", run_type="uipath") @resource_override(resource_type="index") def add_to_index( @@ -127,6 +128,7 @@ def add_to_index( if ingest_data: self.ingest_data(index, folder_key=folder_key, folder_path=folder_path) + # 2.3.0 prefix trace name with contextgrounding @traced(name="add_to_index", run_type="uipath") @resource_override(resource_type="index") async def add_to_index_async( diff --git a/src/uipath/platform/documents/__init__.py b/src/uipath/platform/documents/__init__.py index f66a630a0..6bee9e0f4 100644 --- a/src/uipath/platform/documents/__init__.py +++ b/src/uipath/platform/documents/__init__.py @@ -18,6 +18,7 @@ FileContent, ProjectType, Reference, + StartExtractionResponse, ValidateClassificationAction, ValidateExtractionAction, ValidationAction, @@ -41,4 +42,5 @@ "ClassificationResult", "ClassificationResponse", "FileContent", + "StartExtractionResponse", ] diff --git a/src/uipath/platform/documents/_documents_service.py b/src/uipath/platform/documents/_documents_service.py index 3361ddca6..5fa391e4e 100644 --- a/src/uipath/platform/documents/_documents_service.py +++ b/src/uipath/platform/documents/_documents_service.py @@ -9,6 +9,7 @@ from ..._utils import Endpoint from ...tracing import traced from ..common import BaseService, FolderContext, UiPathApiConfig, UiPathExecutionContext +from ..errors import ExtractionNotCompleteException from .documents import ( ActionPriority, ClassificationResponse, @@ -17,6 +18,7 @@ ExtractionResponseIXP, FileContent, ProjectType, + StartExtractionResponse, ValidateClassificationAction, ValidateExtractionAction, ) @@ -119,7 +121,9 @@ class DocumentsService(FolderContext, BaseService): """ def __init__( - self, config: UiPathApiConfig, execution_context: UiPathExecutionContext + self, + config: UiPathApiConfig, + execution_context: UiPathExecutionContext, ) -> None: super().__init__(config=config, execution_context=execution_context) @@ -433,7 +437,7 @@ def _start_extraction( tag: Optional[str], document_type_id: str, document_id: str, - ) -> str: + ) -> StartExtractionResponse: if project_type == ProjectType.PRETRAINED: url = Endpoint( f"/du_/api/framework/projects/{project_id}/extractors/{document_type_id}/extraction/start" @@ -443,7 +447,7 @@ def _start_extraction( f"/du_/api/framework/projects/{project_id}/{tag}/document-types/{document_type_id}/extraction/start" ) - return self.request( + operation_id = self.request( "POST", url=url, params={"api-version": 1.1}, @@ -451,6 +455,13 @@ def _start_extraction( json={"documentId": document_id}, ).json()["operationId"] + return StartExtractionResponse( + operation_id=operation_id, + document_id=document_id, + project_id=project_id, + tag=tag, + ) + async def _start_extraction_async( self, project_id: str, @@ -458,7 +469,7 @@ async def _start_extraction_async( tag: Optional[str], document_type_id: str, document_id: str, - ) -> str: + ) -> StartExtractionResponse: if project_type == ProjectType.PRETRAINED: url = Endpoint( f"/du_/api/framework/projects/{project_id}/extractors/{document_type_id}/extraction/start" @@ -468,16 +479,21 @@ async def _start_extraction_async( f"/du_/api/framework/projects/{project_id}/{tag}/document-types/{document_type_id}/extraction/start" ) - return ( - await self.request_async( - "POST", - url=url, - params={"api-version": 1.1}, - headers=self._get_common_headers(), - json={"documentId": document_id}, - ) + operation_id = self.request( + "POST", + url=url, + params={"api-version": 1.1}, + headers=self._get_common_headers(), + json={"documentId": document_id}, ).json()["operationId"] + return StartExtractionResponse( + operation_id=operation_id, + document_id=document_id, + project_id=project_id, + tag=tag, + ) + def _wait_for_operation( self, result_getter: Callable[[], Tuple[Any, Optional[Any], Optional[Any]]], @@ -887,6 +903,189 @@ async def classify_async( operation_id=operation_id, ) + @traced(name="documents_start_ixp_extraction", run_type="uipath") + def start_ixp_extraction( + self, + project_name: str, + tag: str, + file: Optional[FileContent] = None, + file_path: Optional[str] = None, + ) -> StartExtractionResponse: + """Start an IXP extraction process without waiting for results (non-blocking). + + This method uploads the file as an attachment and starts the extraction process, + returning immediately without waiting for the extraction to complete. + Use this for async workflows where you want to receive results via callback/webhook. + + Args: + project_name (str): Name of the IXP project. + tag (str): Tag of the published project version (e.g., "staging"). + file (FileContent, optional): The document file to be processed. + file_path (str, optional): Path to the document file to be processed. + + Note: + Either `file` or `file_path` must be provided, but not both. + + Returns: + ExtractionStartResponse: Contains the operation_id, document_id, project_id, and tag + + Examples: + ```python + start_response = uipath.documents.start_ixp_extraction( + project_name="MyIXPProjectName", + tag="staging", + file_path="path/to/document.pdf", + ) + # start_response.operation_id can be used to poll for results later + ``` + """ + _exactly_one_must_be_provided(file=file, file_path=file_path) + + project_id = self._get_project_id_by_name(project_name, ProjectType.IXP) + + document_id = self._start_digitization( + project_id=project_id, + file=file, + file_path=file_path, + ) + + return self._start_extraction( + project_id=project_id, + project_type=ProjectType.IXP, + tag=tag, + document_type_id=str(UUID(int=0)), + document_id=document_id, + ) + + @traced(name="documents_start_ixp_extraction_async", run_type="uipath") + async def start_ixp_extraction_async( + self, + project_name: str, + tag: str, + file: Optional[FileContent] = None, + file_path: Optional[str] = None, + ) -> StartExtractionResponse: + """Asynchronous version of the [`start_ixp_extraction`][uipath.platform.documents._documents_service.DocumentsService.start_ixp_extraction] method.""" + _exactly_one_must_be_provided(file=file, file_path=file_path) + + project_id = await self._get_project_id_by_name_async( + project_name, ProjectType.IXP + ) + + document_id = await self._start_digitization_async( + project_id=project_id, + file=file, + file_path=file_path, + ) + + return await self._start_extraction_async( + project_id=project_id, + project_type=ProjectType.IXP, + tag=tag, + document_type_id=str(UUID(int=0)), + document_id=document_id, + ) + + @traced(name="documents_retrieve_ixp_extraction_result", run_type="uipath") + def retrieve_ixp_extraction_result( + self, + project_id: str, + tag: str, + operation_id: str, + ) -> ExtractionResponseIXP: + """Retrieve the result of an IXP extraction operation (single-shot, non-blocking). + + This method retrieves the result of an IXP extraction that was previously started + with `start_ixp_extraction`. It does not poll - it makes a single request and + returns the result if available, or raises an exception if not complete. + + Args: + project_id (str): The ID of the IXP project. + tag (str): The tag of the published project version. + operation_id (str): The operation ID returned from `start_ixp_extraction`. + + Returns: + ExtractionResponseIXP: The extraction response containing the extracted data. + + Raises: + IxpExtractionNotCompleteException: If the extraction is not yet complete. + + Examples: + ```python + # After receiving a callback/webhook that extraction is complete: + result = service.retrieve_ixp_extraction_result( + project_id=start_response.project_id, + tag=start_response.tag, + operation_id=start_response.operation_id, + ) + ``` + """ + document_type_id = str(UUID(int=0)) + + url = Endpoint( + f"/du_/api/framework/projects/{project_id}/{tag}/document-types/{document_type_id}/extraction/result/{operation_id}" + ) + + result = self.request( + method="GET", + url=url, + params={"api-version": "1.1"}, + headers=self._get_common_headers(), + ).json() + + status = result.get("status") + if status in ["NotStarted", "Running"]: + raise ExtractionNotCompleteException( + operation_id=operation_id, + status=status, + ) + + extraction_response = result.get("result") + extraction_response["projectId"] = project_id + extraction_response["tag"] = tag + extraction_response["documentTypeId"] = document_type_id + extraction_response["projectType"] = ProjectType.IXP + + return ExtractionResponseIXP.model_validate(extraction_response) + + @traced(name="documents_retrieve_ixp_extraction_result_async", run_type="uipath") + async def retrieve_ixp_extraction_result_async( + self, + project_id: str, + tag: str, + operation_id: str, + ) -> ExtractionResponseIXP: + """Asynchronous version of the [`retrieve_ixp_extraction_result`][uipath.platform.documents._documents_service.DocumentsService.retrieve_ixp_extraction_result] method.""" + document_type_id = str(UUID(int=0)) + + url = Endpoint( + f"/du_/api/framework/projects/{project_id}/{tag}/document-types/{document_type_id}/extraction/result/{operation_id}" + ) + + result = ( + await self.request_async( + method="GET", + url=url, + params={"api-version": "1.1"}, + headers=self._get_common_headers(), + ) + ).json() + + status = result.get("status") + if status in ["NotStarted", "Running"]: + raise ExtractionNotCompleteException( + operation_id=operation_id, + status=status, + ) + + extraction_response = result.get("result") + extraction_response["projectId"] = project_id + extraction_response["tag"] = tag + extraction_response["documentTypeId"] = document_type_id + extraction_response["projectType"] = ProjectType.IXP + + return ExtractionResponseIXP.model_validate(extraction_response) + @traced(name="documents_extract", run_type="uipath") def extract( self, @@ -989,7 +1188,7 @@ def extract( tag=tag, document_type_id=document_type_id, document_id=document_id, - ) + ).operation_id return self._wait_for_extraction( project_id=project_id, @@ -1042,13 +1241,15 @@ async def extract_async( classification_result=classification_result, ) - operation_id = await self._start_extraction_async( - project_id=project_id, - project_type=project_type, - tag=tag, - document_type_id=document_type_id, - document_id=document_id, - ) + operation_id = ( + await self._start_extraction_async( + project_id=project_id, + project_type=project_type, + tag=tag, + document_type_id=document_type_id, + document_id=document_id, + ) + ).operation_id return await self._wait_for_extraction_async( project_id=project_id, diff --git a/src/uipath/platform/documents/documents.py b/src/uipath/platform/documents/documents.py index 7b937447f..ea07c395a 100644 --- a/src/uipath/platform/documents/documents.py +++ b/src/uipath/platform/documents/documents.py @@ -53,6 +53,7 @@ class FieldValueProjection(BaseModel): model_config = ConfigDict( serialize_by_alias=True, validate_by_alias=True, + validate_by_name=True, ) id: str @@ -70,6 +71,7 @@ class FieldGroupValueProjection(BaseModel): model_config = ConfigDict( serialize_by_alias=True, validate_by_alias=True, + validate_by_name=True, ) field_group_name: str = Field(alias="fieldGroupName") @@ -82,6 +84,7 @@ class ExtractionResult(BaseModel): model_config = ConfigDict( serialize_by_alias=True, validate_by_alias=True, + validate_by_name=True, ) document_id: str = Field(alias="DocumentId") @@ -108,6 +111,7 @@ class ExtractionResponse(BaseModel): model_config = ConfigDict( serialize_by_alias=True, validate_by_alias=True, + validate_by_name=True, ) extraction_result: ExtractionResult = Field(alias="extractionResult") @@ -141,6 +145,7 @@ class ValidationAction(BaseModel): model_config = ConfigDict( serialize_by_alias=True, validate_by_alias=True, + validate_by_name=True, ) action_data: dict[str, Any] = Field(alias="actionData") @@ -169,6 +174,7 @@ class Reference(BaseModel): model_config = ConfigDict( serialize_by_alias=True, validate_by_alias=True, + validate_by_name=True, ) text_start_index: int = Field(alias="TextStartIndex") @@ -182,6 +188,7 @@ class DocumentBounds(BaseModel): model_config = ConfigDict( serialize_by_alias=True, validate_by_alias=True, + validate_by_name=True, ) start_page: int = Field(alias="StartPage") @@ -208,6 +215,7 @@ class ClassificationResult(BaseModel): model_config = ConfigDict( serialize_by_alias=True, validate_by_alias=True, + validate_by_name=True, ) document_id: str = Field(alias="DocumentId") @@ -228,8 +236,31 @@ class ClassificationResponse(BaseModel): model_config = ConfigDict( serialize_by_alias=True, validate_by_alias=True, + validate_by_name=True, ) classification_results: List[ClassificationResult] = Field( alias="classificationResults" ) + + +class StartExtractionResponse(BaseModel): + """A model representing the response from starting an extraction process. + + Attributes: + operation_id (str): The ID of the extraction operation, used to poll for results. + document_id (str): The ID of the digitized document. + project_id (str): The ID of the project. + tag (str): The tag of the published project version. + """ + + model_config = ConfigDict( + serialize_by_alias=True, + validate_by_alias=True, + validate_by_name=True, + ) + + operation_id: str = Field(alias="operationId") + document_id: str = Field(alias="documentId") + project_id: str = Field(alias="projectId") + tag: str | None = Field(default=None) diff --git a/src/uipath/platform/errors/__init__.py b/src/uipath/platform/errors/__init__.py index 126c192c8..dbb403395 100644 --- a/src/uipath/platform/errors/__init__.py +++ b/src/uipath/platform/errors/__init__.py @@ -9,6 +9,7 @@ - UnsupportedDataSourceException: Raised when an operation is attempted on an unsupported data source type - IngestionInProgressException: Raised when a search is attempted on an index during ingestion - BatchTransformNotCompleteException: Raised when attempting to get results from an incomplete batch transform +- IxpExtractionNotCompleteException: Raised when attempting to get results from an incomplete IXP extraction - EnrichedException: Enriched HTTP error with detailed request/response information """ @@ -17,6 +18,7 @@ from ._enriched_exception import EnrichedException from ._folder_not_found_exception import FolderNotFoundException from ._ingestion_in_progress_exception import IngestionInProgressException +from ._ixp_extraction_not_complete_exception import ExtractionNotCompleteException from ._secret_missing_error import SecretMissingError from ._unsupported_data_source_exception import UnsupportedDataSourceException @@ -26,6 +28,7 @@ "EnrichedException", "FolderNotFoundException", "IngestionInProgressException", + "ExtractionNotCompleteException", "SecretMissingError", "UnsupportedDataSourceException", ] diff --git a/src/uipath/platform/errors/_ixp_extraction_not_complete_exception.py b/src/uipath/platform/errors/_ixp_extraction_not_complete_exception.py new file mode 100644 index 000000000..f36da1771 --- /dev/null +++ b/src/uipath/platform/errors/_ixp_extraction_not_complete_exception.py @@ -0,0 +1,14 @@ +class ExtractionNotCompleteException(Exception): + """Raised when attempting to get results from an incomplete IXP extraction. + + This exception is raised when attempting to retrieve results from an IXP + extraction operation that has not yet completed successfully. + """ + + def __init__(self, operation_id: str, status: str): + self.operation_id = operation_id + self.status = status + self.message = ( + f"IXP extraction '{operation_id}' is not complete. Current status: {status}" + ) + super().__init__(self.message) diff --git a/src/uipath/platform/orchestrator/_folder_service.py b/src/uipath/platform/orchestrator/_folder_service.py index c49844a50..5fbb0447e 100644 --- a/src/uipath/platform/orchestrator/_folder_service.py +++ b/src/uipath/platform/orchestrator/_folder_service.py @@ -6,6 +6,7 @@ from ...tracing import traced from ..common import BaseService, UiPathApiConfig, UiPathExecutionContext from ..errors import FolderNotFoundException +from .folder import PersonalWorkspace class FolderService(BaseService): @@ -167,3 +168,53 @@ def _retrieve_spec( "take": take, }, ) + + @traced(name="folder_get_personal_workspace", run_type="uipath") + def get_personal_workspace(self) -> PersonalWorkspace: + """Retrieve the personal workspace folder for the current user. + + Returns: + PersonalWorkspace: The personal workspace information. + + Raises: + ValueError: If the user does not have a personal workspace. + """ + response = self.request( + "GET", + url=Endpoint( + "orchestrator_/odata/Users/UiPath.Server.Configuration.OData.GetCurrentUserExtended" + ), + params={"$select": "PersonalWorkspace", "$expand": "PersonalWorkspace"}, + ).json() + + personal_workspace = response.get("PersonalWorkspace") + if personal_workspace is None: + raise ValueError("Failed to fetch personal workspace") + + return PersonalWorkspace.model_validate(personal_workspace) + + @traced(name="folder_get_personal_workspace_async", run_type="uipath") + async def get_personal_workspace_async(self) -> PersonalWorkspace: + """Asynchronously retrieve the personal workspace folder for the current user. + + Returns: + PersonalWorkspace: The personal workspace information. + + Raises: + ValueError: If the personal workspace cannot be fetched. + """ + response = ( + await self.request_async( + "GET", + url=Endpoint( + "orchestrator_/odata/Users/UiPath.Server.Configuration.OData.GetCurrentUserExtended" + ), + params={"$select": "PersonalWorkspace", "$expand": "PersonalWorkspace"}, + ) + ).json() + + personal_workspace = response.get("PersonalWorkspace") + if personal_workspace is None: + raise ValueError("Failed to fetch personal workspace") + + return PersonalWorkspace.model_validate(personal_workspace) diff --git a/src/uipath/platform/orchestrator/folder.py b/src/uipath/platform/orchestrator/folder.py new file mode 100644 index 000000000..8d9a91c9e --- /dev/null +++ b/src/uipath/platform/orchestrator/folder.py @@ -0,0 +1,15 @@ +"""Models for Orchestrator Folders API responses.""" + +from pydantic import BaseModel, ConfigDict, Field + + +class PersonalWorkspace(BaseModel): + """Represents a user's personal workspace folder.""" + + model_config = ConfigDict( + populate_by_name=True, + ) + + fully_qualified_name: str = Field(alias="FullyQualifiedName") + key: str = Field(alias="Key") + id: int = Field(alias="Id") diff --git a/tests/sdk/services/test_documents_service.py b/tests/sdk/services/test_documents_service.py index f79465d18..9f0cb1cd8 100644 --- a/tests/sdk/services/test_documents_service.py +++ b/tests/sdk/services/test_documents_service.py @@ -1,5 +1,6 @@ import json from pathlib import Path +from typing import Any from unittest.mock import Mock, patch from uuid import UUID, uuid4 @@ -18,11 +19,18 @@ from uipath.platform.documents._documents_service import ( # type: ignore[attr-defined] DocumentsService, ) +from uipath.platform.errors import ExtractionNotCompleteException @pytest.fixture -def service(config: UiPathApiConfig, execution_context: UiPathExecutionContext): - return DocumentsService(config=config, execution_context=execution_context) +def service( + config: UiPathApiConfig, + execution_context: UiPathExecutionContext, +): + return DocumentsService( + config=config, + execution_context=execution_context, + ) @pytest.fixture @@ -1836,3 +1844,170 @@ async def mock_result_getter_async(): wait_statuses=["NotStarted", "Running"], success_status="Succeeded", ) + + @pytest.mark.parametrize("mode", ["sync", "async"]) + @pytest.mark.asyncio + async def test_start_ixp_extraction( + self, + httpx_mock: HTTPXMock, + service: DocumentsService, + base_url: str, + org: str, + tenant: str, + mode: str, + ): + # ARRANGE + project_id = str(uuid4()) + document_id = str(uuid4()) + operation_id = str(uuid4()) + + httpx_mock.add_response( + url=f"{base_url}{org}{tenant}/du_/api/framework/projects?api-version=1.1&type=IXP", + status_code=200, + match_headers={"X-UiPath-Internal-ConsumptionSourceType": "CodedAgents"}, + json={ + "projects": [ + {"id": project_id, "name": "TestProjectIXP"}, + ] + }, + ) + httpx_mock.add_response( + url=f"{base_url}{org}{tenant}/du_/api/framework/projects/{project_id}/digitization/start?api-version=1.1", + status_code=200, + match_headers={"X-UiPath-Internal-ConsumptionSourceType": "CodedAgents"}, + match_files={"File": b"test content"}, + json={"documentId": document_id}, + ) + + httpx_mock.add_response( + method="POST", + url=f"{base_url}{org}{tenant}/du_/api/framework/projects/{project_id}/staging/document-types/{UUID(int=0)}/extraction/start?api-version=1.1", + status_code=200, + match_headers={"X-UiPath-Internal-ConsumptionSourceType": "CodedAgents"}, + match_json={"documentId": document_id}, + json={"operationId": operation_id}, + ) + + # ACT + if mode == "async": + response = await service.start_ixp_extraction_async( + project_name="TestProjectIXP", + tag="staging", + file=b"test content", + ) + else: + response = service.start_ixp_extraction( + project_name="TestProjectIXP", + tag="staging", + file=b"test content", + ) + + # ASSERT + assert response.operation_id == operation_id + assert response.document_id == document_id + assert response.project_id == project_id + assert response.tag == "staging" + + @pytest.mark.parametrize("mode", ["sync", "async"]) + @pytest.mark.asyncio + async def test_start_ixp_extraction_invalid_parameters( + self, + service: DocumentsService, + mode: str, + ): + # ACT & ASSERT + with pytest.raises( + ValueError, + match="Exactly one of `file, file_path` must be provided", + ): + if mode == "async": + await service.start_ixp_extraction_async( + project_name="TestProject", + tag="staging", + ) + else: + service.start_ixp_extraction( + project_name="TestProject", + tag="staging", + ) + + @pytest.mark.parametrize("mode", ["sync", "async"]) + @pytest.mark.asyncio + async def test_retrieve_ixp_extraction_result_success( + self, + httpx_mock: HTTPXMock, + service: DocumentsService, + base_url: str, + org: str, + tenant: str, + ixp_extraction_response: dict[str, Any], + mode: str, + ): + # ARRANGE + project_id = str(uuid4()) + operation_id = str(uuid4()) + + httpx_mock.add_response( + url=f"{base_url}{org}{tenant}/du_/api/framework/projects/{project_id}/staging/document-types/{UUID(int=0)}/extraction/result/{operation_id}?api-version=1.1", + status_code=200, + match_headers={"X-UiPath-Internal-ConsumptionSourceType": "CodedAgents"}, + json={"status": "Succeeded", "result": ixp_extraction_response}, + ) + + # ACT + if mode == "async": + response = await service.retrieve_ixp_extraction_result_async( + project_id=project_id, + tag="staging", + operation_id=operation_id, + ) + else: + response = service.retrieve_ixp_extraction_result( + project_id=project_id, + tag="staging", + operation_id=operation_id, + ) + + # ASSERT + assert response.project_id == project_id + assert response.tag == "staging" + + @pytest.mark.parametrize("mode", ["sync", "async"]) + @pytest.mark.asyncio + async def test_retrieve_ixp_extraction_result_not_complete( + self, + httpx_mock: HTTPXMock, + service: DocumentsService, + base_url: str, + org: str, + tenant: str, + mode: str, + ): + # ARRANGE + project_id = str(uuid4()) + operation_id = str(uuid4()) + + httpx_mock.add_response( + url=f"{base_url}{org}{tenant}/du_/api/framework/projects/{project_id}/staging/document-types/{UUID(int=0)}/extraction/result/{operation_id}?api-version=1.1", + status_code=200, + match_headers={"X-UiPath-Internal-ConsumptionSourceType": "CodedAgents"}, + json={"status": "Running"}, + ) + + # ACT & ASSERT + with pytest.raises(ExtractionNotCompleteException) as exc_info: + if mode == "async": + await service.retrieve_ixp_extraction_result_async( + project_id=project_id, + tag="staging", + operation_id=operation_id, + ) + else: + service.retrieve_ixp_extraction_result( + project_id=project_id, + tag="staging", + operation_id=operation_id, + ) + + assert exc_info.value.operation_id == operation_id + assert exc_info.value.status == "Running"