Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@ def __init__(
self._buckets_service = buckets_service
super().__init__(config=config, execution_context=execution_context)

# 2.3.0 prefix trace name with contextgrounding
@traced(name="add_to_index", run_type="uipath")
@resource_override(resource_type="index")
def add_to_index(
Expand Down Expand Up @@ -127,6 +128,7 @@ def add_to_index(
if ingest_data:
self.ingest_data(index, folder_key=folder_key, folder_path=folder_path)

# 2.3.0 prefix trace name with contextgrounding
@traced(name="add_to_index", run_type="uipath")
@resource_override(resource_type="index")
async def add_to_index_async(
Expand Down
2 changes: 2 additions & 0 deletions src/uipath/platform/documents/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
FileContent,
ProjectType,
Reference,
StartExtractionResponse,
ValidateClassificationAction,
ValidateExtractionAction,
ValidationAction,
Expand All @@ -41,4 +42,5 @@
"ClassificationResult",
"ClassificationResponse",
"FileContent",
"StartExtractionResponse",
]
241 changes: 221 additions & 20 deletions src/uipath/platform/documents/_documents_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from ..._utils import Endpoint
from ...tracing import traced
from ..common import BaseService, FolderContext, UiPathApiConfig, UiPathExecutionContext
from ..errors import ExtractionNotCompleteException
from .documents import (
ActionPriority,
ClassificationResponse,
Expand All @@ -17,6 +18,7 @@
ExtractionResponseIXP,
FileContent,
ProjectType,
StartExtractionResponse,
ValidateClassificationAction,
ValidateExtractionAction,
)
Expand Down Expand Up @@ -119,7 +121,9 @@ class DocumentsService(FolderContext, BaseService):
"""

def __init__(
self, config: UiPathApiConfig, execution_context: UiPathExecutionContext
self,
config: UiPathApiConfig,
execution_context: UiPathExecutionContext,
) -> None:
super().__init__(config=config, execution_context=execution_context)

Expand Down Expand Up @@ -433,7 +437,7 @@ def _start_extraction(
tag: Optional[str],
document_type_id: str,
document_id: str,
) -> str:
) -> StartExtractionResponse:
if project_type == ProjectType.PRETRAINED:
url = Endpoint(
f"/du_/api/framework/projects/{project_id}/extractors/{document_type_id}/extraction/start"
Expand All @@ -443,22 +447,29 @@ def _start_extraction(
f"/du_/api/framework/projects/{project_id}/{tag}/document-types/{document_type_id}/extraction/start"
)

return self.request(
operation_id = self.request(
"POST",
url=url,
params={"api-version": 1.1},
headers=self._get_common_headers(),
json={"documentId": document_id},
).json()["operationId"]

return StartExtractionResponse(
operation_id=operation_id,
document_id=document_id,
project_id=project_id,
tag=tag,
)

async def _start_extraction_async(
self,
project_id: str,
project_type: ProjectType,
tag: Optional[str],
document_type_id: str,
document_id: str,
) -> str:
) -> StartExtractionResponse:
if project_type == ProjectType.PRETRAINED:
url = Endpoint(
f"/du_/api/framework/projects/{project_id}/extractors/{document_type_id}/extraction/start"
Expand All @@ -468,16 +479,21 @@ async def _start_extraction_async(
f"/du_/api/framework/projects/{project_id}/{tag}/document-types/{document_type_id}/extraction/start"
)

return (
await self.request_async(
"POST",
url=url,
params={"api-version": 1.1},
headers=self._get_common_headers(),
json={"documentId": document_id},
)
operation_id = self.request(
"POST",
url=url,
params={"api-version": 1.1},
headers=self._get_common_headers(),
json={"documentId": document_id},
).json()["operationId"]

return StartExtractionResponse(
operation_id=operation_id,
document_id=document_id,
project_id=project_id,
tag=tag,
)

def _wait_for_operation(
self,
result_getter: Callable[[], Tuple[Any, Optional[Any], Optional[Any]]],
Expand Down Expand Up @@ -887,6 +903,189 @@ async def classify_async(
operation_id=operation_id,
)

@traced(name="documents_start_ixp_extraction", run_type="uipath")
def start_ixp_extraction(
self,
project_name: str,
tag: str,
file: Optional[FileContent] = None,
file_path: Optional[str] = None,
) -> StartExtractionResponse:
"""Start an IXP extraction process without waiting for results (non-blocking).

This method uploads the file as an attachment and starts the extraction process,
returning immediately without waiting for the extraction to complete.
Use this for async workflows where you want to receive results via callback/webhook.

Args:
project_name (str): Name of the IXP project.
tag (str): Tag of the published project version (e.g., "staging").
file (FileContent, optional): The document file to be processed.
file_path (str, optional): Path to the document file to be processed.

Note:
Either `file` or `file_path` must be provided, but not both.

Returns:
ExtractionStartResponse: Contains the operation_id, document_id, project_id, and tag

Examples:
```python
start_response = uipath.documents.start_ixp_extraction(
project_name="MyIXPProjectName",
tag="staging",
file_path="path/to/document.pdf",
)
# start_response.operation_id can be used to poll for results later
```
"""
_exactly_one_must_be_provided(file=file, file_path=file_path)

project_id = self._get_project_id_by_name(project_name, ProjectType.IXP)

document_id = self._start_digitization(
project_id=project_id,
file=file,
file_path=file_path,
)

return self._start_extraction(
project_id=project_id,
project_type=ProjectType.IXP,
tag=tag,
document_type_id=str(UUID(int=0)),
document_id=document_id,
)

@traced(name="documents_start_ixp_extraction_async", run_type="uipath")
async def start_ixp_extraction_async(
self,
project_name: str,
tag: str,
file: Optional[FileContent] = None,
file_path: Optional[str] = None,
) -> StartExtractionResponse:
"""Asynchronous version of the [`start_ixp_extraction`][uipath.platform.documents._documents_service.DocumentsService.start_ixp_extraction] method."""
_exactly_one_must_be_provided(file=file, file_path=file_path)

project_id = await self._get_project_id_by_name_async(
project_name, ProjectType.IXP
)

document_id = await self._start_digitization_async(
project_id=project_id,
file=file,
file_path=file_path,
)

return await self._start_extraction_async(
project_id=project_id,
project_type=ProjectType.IXP,
tag=tag,
document_type_id=str(UUID(int=0)),
document_id=document_id,
)

@traced(name="documents_retrieve_ixp_extraction_result", run_type="uipath")
def retrieve_ixp_extraction_result(
self,
project_id: str,
tag: str,
operation_id: str,
) -> ExtractionResponseIXP:
"""Retrieve the result of an IXP extraction operation (single-shot, non-blocking).

This method retrieves the result of an IXP extraction that was previously started
with `start_ixp_extraction`. It does not poll - it makes a single request and
returns the result if available, or raises an exception if not complete.

Args:
project_id (str): The ID of the IXP project.
tag (str): The tag of the published project version.
operation_id (str): The operation ID returned from `start_ixp_extraction`.

Returns:
ExtractionResponseIXP: The extraction response containing the extracted data.

Raises:
IxpExtractionNotCompleteException: If the extraction is not yet complete.

Examples:
```python
# After receiving a callback/webhook that extraction is complete:
result = service.retrieve_ixp_extraction_result(
project_id=start_response.project_id,
tag=start_response.tag,
operation_id=start_response.operation_id,
)
```
"""
document_type_id = str(UUID(int=0))

url = Endpoint(
f"/du_/api/framework/projects/{project_id}/{tag}/document-types/{document_type_id}/extraction/result/{operation_id}"
)

result = self.request(
method="GET",
url=url,
params={"api-version": "1.1"},
headers=self._get_common_headers(),
).json()

status = result.get("status")
if status in ["NotStarted", "Running"]:
raise ExtractionNotCompleteException(
operation_id=operation_id,
status=status,
)

extraction_response = result.get("result")
extraction_response["projectId"] = project_id
extraction_response["tag"] = tag
extraction_response["documentTypeId"] = document_type_id
extraction_response["projectType"] = ProjectType.IXP

return ExtractionResponseIXP.model_validate(extraction_response)

@traced(name="documents_retrieve_ixp_extraction_result_async", run_type="uipath")
async def retrieve_ixp_extraction_result_async(
self,
project_id: str,
tag: str,
operation_id: str,
) -> ExtractionResponseIXP:
"""Asynchronous version of the [`retrieve_ixp_extraction_result`][uipath.platform.documents._documents_service.DocumentsService.retrieve_ixp_extraction_result] method."""
document_type_id = str(UUID(int=0))

url = Endpoint(
f"/du_/api/framework/projects/{project_id}/{tag}/document-types/{document_type_id}/extraction/result/{operation_id}"
)

result = (
await self.request_async(
method="GET",
url=url,
params={"api-version": "1.1"},
headers=self._get_common_headers(),
)
).json()

status = result.get("status")
if status in ["NotStarted", "Running"]:
raise ExtractionNotCompleteException(
operation_id=operation_id,
status=status,
)

extraction_response = result.get("result")
extraction_response["projectId"] = project_id
extraction_response["tag"] = tag
extraction_response["documentTypeId"] = document_type_id
extraction_response["projectType"] = ProjectType.IXP

return ExtractionResponseIXP.model_validate(extraction_response)

@traced(name="documents_extract", run_type="uipath")
def extract(
self,
Expand Down Expand Up @@ -989,7 +1188,7 @@ def extract(
tag=tag,
document_type_id=document_type_id,
document_id=document_id,
)
).operation_id

return self._wait_for_extraction(
project_id=project_id,
Expand Down Expand Up @@ -1042,13 +1241,15 @@ async def extract_async(
classification_result=classification_result,
)

operation_id = await self._start_extraction_async(
project_id=project_id,
project_type=project_type,
tag=tag,
document_type_id=document_type_id,
document_id=document_id,
)
operation_id = (
await self._start_extraction_async(
project_id=project_id,
project_type=project_type,
tag=tag,
document_type_id=document_type_id,
document_id=document_id,
)
).operation_id

return await self._wait_for_extraction_async(
project_id=project_id,
Expand Down
Loading