diff --git a/.release-please-manifest.json b/.release-please-manifest.json index c412e97..52b3e83 100644 --- a/.release-please-manifest.json +++ b/.release-please-manifest.json @@ -1,3 +1,3 @@ { - ".": "0.1.0-alpha.29" + ".": "0.1.0-alpha.30" } \ No newline at end of file diff --git a/.stats.yml b/.stats.yml index 4739e91..2e71139 100644 --- a/.stats.yml +++ b/.stats.yml @@ -1,3 +1,3 @@ configured_endpoints: 55 -openapi_spec_hash: b54b36ebcaf88c1ddb6d51d24da75420 +openapi_spec_hash: c894ce3fb9db92c69816f06896e30067 config_hash: 48c3812186c899cdef23cc8de76bd2aa diff --git a/CHANGELOG.md b/CHANGELOG.md index 4a3238c..9cab6a3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,21 @@ # Changelog +## 0.1.0-alpha.30 (2025-10-14) + +Full Changelog: [v0.1.0-alpha.29...v0.1.0-alpha.30](https://github.com/cleanlab/codex-python/compare/v0.1.0-alpha.29...v0.1.0-alpha.30) + +### Features + +* **api:** api update ([5116aea](https://github.com/cleanlab/codex-python/commit/5116aea9d162458af9e46e3ace3d0d6c58d5ae2d)) +* **api:** api update ([f62cb7c](https://github.com/cleanlab/codex-python/commit/f62cb7c8826d73bac8bbb047093bdaa41749e8da)) +* **api:** api update ([cb2c6ed](https://github.com/cleanlab/codex-python/commit/cb2c6ed08eb4286ec580cc0e48c7954657d48df1)) +* **api:** api update ([c515d78](https://github.com/cleanlab/codex-python/commit/c515d7888f17019a2307e47520a39790cd9d0209)) + + +### Chores + +* **internal:** detect missing future annotations with ruff ([8e2cc28](https://github.com/cleanlab/codex-python/commit/8e2cc28bac8208d5b0eaa39fcc03248188af076a)) + ## 0.1.0-alpha.29 (2025-10-06) Full Changelog: [v0.1.0-alpha.28...v0.1.0-alpha.29](https://github.com/cleanlab/codex-python/compare/v0.1.0-alpha.28...v0.1.0-alpha.29) diff --git a/pyproject.toml b/pyproject.toml index 1a7d218..4e0140e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "codex-sdk" -version = "0.1.0-alpha.29" +version = "0.1.0-alpha.30" description = "Internal SDK used within cleanlab-codex package. Refer to https://pypi.org/project/cleanlab-codex/ instead." dynamic = ["readme"] license = "MIT" @@ -224,6 +224,8 @@ select = [ "B", # remove unused imports "F401", + # check for missing future annotations + "FA102", # bare except statements "E722", # unused arguments @@ -246,6 +248,8 @@ unfixable = [ "T203", ] +extend-safe-fixes = ["FA102"] + [tool.ruff.lint.flake8-tidy-imports.banned-api] "functools.lru_cache".msg = "This function does not retain type information for the wrapped function's arguments; The `lru_cache` function from `_utils` should be used instead" diff --git a/src/codex/_version.py b/src/codex/_version.py index 77a43df..a008ac3 100644 --- a/src/codex/_version.py +++ b/src/codex/_version.py @@ -1,4 +1,4 @@ # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. __title__ = "codex" -__version__ = "0.1.0-alpha.29" # x-release-please-version +__version__ = "0.1.0-alpha.30" # x-release-please-version diff --git a/src/codex/resources/projects/projects.py b/src/codex/resources/projects/projects.py index 4575e8a..e94a6d3 100644 --- a/src/codex/resources/projects/projects.py +++ b/src/codex/resources/projects/projects.py @@ -564,8 +564,9 @@ def retrieve_analytics( self, project_id: str, *, - end: int | Omit = omit, - start: int | Omit = omit, + end: Optional[int] | Omit = omit, + metadata_filters: Optional[str] | Omit = omit, + start: Optional[int] | Omit = omit, # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. # The extra values given here take precedence over values defined on the client or passed to this method. extra_headers: Headers | None = None, @@ -574,12 +575,46 @@ def retrieve_analytics( timeout: float | httpx.Timeout | None | NotGiven = not_given, ) -> ProjectRetrieveAnalyticsResponse: """ - Get Project Analytics Route + Retrieve analytics data for a project including queries, bad responses, and + answers published. + + **Metadata Filtering:** + - Filter by custom metadata fields using key-value pairs + - Supports single values: `{"department": "Engineering"}` + - Supports multiple values: `{"priority": ["high", "medium"]}` + - Supports null/missing values: `{"department": []}` or `{"department": [null]}` + + **Available Metadata Fields:** + - Only metadata keys that exist on query logs are returned in `metadata_fields` + - Fields with ≤12 unique values show as "select" type with checkbox options + - Fields with >12 unique values show as "input" type for text search + - Fields with no data are excluded from the response entirely + + **Null Value Behavior:** + - Empty arrays `[]` are automatically converted to `[null]` to filter for records where the metadata field is missing or null + - Use `[null]` explicitly to filter for records where the field is missing or null + - Use `["value1", null, "value2"]` to include both specific values and null values + - Records match if the metadata field is null, missing from custom_metadata, or custom_metadata itself is null + + **Date Filtering:** + - Provide `start` only: filter logs created at or after this timestamp + - Provide `end` only: filter logs created at or before this timestamp + - Provide both: filter logs created within the time range + - Provide neither: include all logs regardless of creation time Args: - end: End timestamp in seconds since epoch + end: Filter logs created at or before this timestamp (epoch seconds). Can be used + alone for upper-bound filtering. - start: Start timestamp in seconds since epoch + metadata_filters: + Metadata filters as JSON string. Examples: + - Single value: '{"department": "Engineering"}' + - Multiple values: '{"priority": ["high", "medium"]}' + - Null/missing values: '{"department": []}' or '{"department": [null]}' + - Mixed values: '{"status": ["active", null, "pending"]}' + + start: Filter logs created at or after this timestamp (epoch seconds). Can be used + alone for lower-bound filtering. extra_headers: Send extra headers @@ -601,6 +636,7 @@ def retrieve_analytics( query=maybe_transform( { "end": end, + "metadata_filters": metadata_filters, "start": start, }, project_retrieve_analytics_params.ProjectRetrieveAnalyticsParams, @@ -1301,8 +1337,9 @@ async def retrieve_analytics( self, project_id: str, *, - end: int | Omit = omit, - start: int | Omit = omit, + end: Optional[int] | Omit = omit, + metadata_filters: Optional[str] | Omit = omit, + start: Optional[int] | Omit = omit, # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. # The extra values given here take precedence over values defined on the client or passed to this method. extra_headers: Headers | None = None, @@ -1311,12 +1348,46 @@ async def retrieve_analytics( timeout: float | httpx.Timeout | None | NotGiven = not_given, ) -> ProjectRetrieveAnalyticsResponse: """ - Get Project Analytics Route + Retrieve analytics data for a project including queries, bad responses, and + answers published. + + **Metadata Filtering:** + - Filter by custom metadata fields using key-value pairs + - Supports single values: `{"department": "Engineering"}` + - Supports multiple values: `{"priority": ["high", "medium"]}` + - Supports null/missing values: `{"department": []}` or `{"department": [null]}` + + **Available Metadata Fields:** + - Only metadata keys that exist on query logs are returned in `metadata_fields` + - Fields with ≤12 unique values show as "select" type with checkbox options + - Fields with >12 unique values show as "input" type for text search + - Fields with no data are excluded from the response entirely + + **Null Value Behavior:** + - Empty arrays `[]` are automatically converted to `[null]` to filter for records where the metadata field is missing or null + - Use `[null]` explicitly to filter for records where the field is missing or null + - Use `["value1", null, "value2"]` to include both specific values and null values + - Records match if the metadata field is null, missing from custom_metadata, or custom_metadata itself is null + + **Date Filtering:** + - Provide `start` only: filter logs created at or after this timestamp + - Provide `end` only: filter logs created at or before this timestamp + - Provide both: filter logs created within the time range + - Provide neither: include all logs regardless of creation time Args: - end: End timestamp in seconds since epoch + end: Filter logs created at or before this timestamp (epoch seconds). Can be used + alone for upper-bound filtering. + + metadata_filters: + Metadata filters as JSON string. Examples: + - Single value: '{"department": "Engineering"}' + - Multiple values: '{"priority": ["high", "medium"]}' + - Null/missing values: '{"department": []}' or '{"department": [null]}' + - Mixed values: '{"status": ["active", null, "pending"]}' - start: Start timestamp in seconds since epoch + start: Filter logs created at or after this timestamp (epoch seconds). Can be used + alone for lower-bound filtering. extra_headers: Send extra headers @@ -1338,6 +1409,7 @@ async def retrieve_analytics( query=await async_maybe_transform( { "end": end, + "metadata_filters": metadata_filters, "start": start, }, project_retrieve_analytics_params.ProjectRetrieveAnalyticsParams, diff --git a/src/codex/types/project_detect_response.py b/src/codex/types/project_detect_response.py index 27044c1..cdee7dc 100644 --- a/src/codex/types/project_detect_response.py +++ b/src/codex/types/project_detect_response.py @@ -28,8 +28,6 @@ class EvalScores(BaseModel): triggered_guardrail: bool - failed: Optional[bool] = None - log: Optional[object] = None @@ -56,8 +54,11 @@ class ProjectDetectResponse(BaseModel): Codex Project, or None otherwise. """ - expert_review_guardrail_explanation: Optional[str] = None - """Explanation from a similar bad query log that caused this to be guardrailed""" + expert_guardrail_override_explanation: Optional[str] = None + """ + Explanation of why the response was either guardrailed or not guardrailed by + expert review. Expert review will override the original guardrail decision. + """ should_guardrail: bool """ diff --git a/src/codex/types/project_list_response.py b/src/codex/types/project_list_response.py index 9fd609f..5b2d692 100644 --- a/src/codex/types/project_list_response.py +++ b/src/codex/types/project_list_response.py @@ -30,6 +30,12 @@ class ProjectConfigEvalConfigCustomEvalsEvals(BaseModel): how """ + display_name: str + """Human-friendly name for display. + + For default evals, prefer standardized labels; otherwise use configured name. + """ + eval_key: str """ Unique key for eval metric - currently maps to the TrustworthyRAG name property diff --git a/src/codex/types/project_retrieve_analytics_params.py b/src/codex/types/project_retrieve_analytics_params.py index c5f9a48..47719ca 100644 --- a/src/codex/types/project_retrieve_analytics_params.py +++ b/src/codex/types/project_retrieve_analytics_params.py @@ -2,14 +2,30 @@ from __future__ import annotations +from typing import Optional from typing_extensions import TypedDict __all__ = ["ProjectRetrieveAnalyticsParams"] class ProjectRetrieveAnalyticsParams(TypedDict, total=False): - end: int - """End timestamp in seconds since epoch""" + end: Optional[int] + """Filter logs created at or before this timestamp (epoch seconds). - start: int - """Start timestamp in seconds since epoch""" + Can be used alone for upper-bound filtering. + """ + + metadata_filters: Optional[str] + """Metadata filters as JSON string. + + Examples: - Single value: '{"department": "Engineering"}' - Multiple values: + '{"priority": ["high", "medium"]}' - Null/missing values: '{"department": []}' + or '{"department": [null]}' - Mixed values: '{"status": ["active", null, + "pending"]}' + """ + + start: Optional[int] + """Filter logs created at or after this timestamp (epoch seconds). + + Can be used alone for lower-bound filtering. + """ diff --git a/src/codex/types/project_retrieve_analytics_response.py b/src/codex/types/project_retrieve_analytics_response.py index b1e5d85..77110c2 100644 --- a/src/codex/types/project_retrieve_analytics_response.py +++ b/src/codex/types/project_retrieve_analytics_response.py @@ -1,6 +1,7 @@ # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. -from typing import Dict, List +from typing import Dict, List, Optional +from typing_extensions import Literal from .._models import BaseModel @@ -11,6 +12,7 @@ "BadResponses", "BadResponsesResponsesByType", "Queries", + "MetadataField", ] @@ -44,9 +46,27 @@ class Queries(BaseModel): total: int +class MetadataField(BaseModel): + field_type: Literal["select", "input"] + """Field type: 'select' for checkbox selection, 'input' for text input""" + + key: str + """Metadata field key""" + + values: Optional[List[Optional[str]]] = None + """Possible values for this metadata field (None if more than 12 values). + + Array elements may include null to represent logs where the metadata key is + missing or null. + """ + + class ProjectRetrieveAnalyticsResponse(BaseModel): answers_published: AnswersPublished bad_responses: BadResponses queries: Queries + + metadata_fields: Optional[List[MetadataField]] = None + """Available metadata fields for filtering""" diff --git a/src/codex/types/project_retrieve_response.py b/src/codex/types/project_retrieve_response.py index 949ce25..399ddd9 100644 --- a/src/codex/types/project_retrieve_response.py +++ b/src/codex/types/project_retrieve_response.py @@ -28,6 +28,12 @@ class ConfigEvalConfigCustomEvalsEvals(BaseModel): how """ + display_name: str + """Human-friendly name for display. + + For default evals, prefer standardized labels; otherwise use configured name. + """ + eval_key: str """ Unique key for eval metric - currently maps to the TrustworthyRAG name property diff --git a/src/codex/types/project_return_schema.py b/src/codex/types/project_return_schema.py index cf2f3a8..923de6b 100644 --- a/src/codex/types/project_return_schema.py +++ b/src/codex/types/project_return_schema.py @@ -28,6 +28,12 @@ class ConfigEvalConfigCustomEvalsEvals(BaseModel): how """ + display_name: str + """Human-friendly name for display. + + For default evals, prefer standardized labels; otherwise use configured name. + """ + eval_key: str """ Unique key for eval metric - currently maps to the TrustworthyRAG name property diff --git a/src/codex/types/project_validate_response.py b/src/codex/types/project_validate_response.py index 1c0cc4a..458e4fc 100644 --- a/src/codex/types/project_validate_response.py +++ b/src/codex/types/project_validate_response.py @@ -28,8 +28,6 @@ class EvalScores(BaseModel): triggered_guardrail: bool - failed: Optional[bool] = None - log: Optional[object] = None @@ -56,14 +54,10 @@ class ProjectValidateResponse(BaseModel): Codex Project, or None otherwise. """ - expert_review_guardrail_explanation: Optional[str] = None - """Explanation from a similar bad query log that caused this to be guardrailed""" - - is_bad_response: bool - """True if the response is flagged as potentially bad, False otherwise. - - When True, a lookup is performed, which logs this query in the project for SMEs - to answer, if it does not already exist. + expert_guardrail_override_explanation: Optional[str] = None + """ + Explanation of why the response was either guardrailed or not guardrailed by + expert review. Expert review will override the original guardrail decision. """ log_id: str diff --git a/src/codex/types/projects/eval_list_response.py b/src/codex/types/projects/eval_list_response.py index 073312d..572de97 100644 --- a/src/codex/types/projects/eval_list_response.py +++ b/src/codex/types/projects/eval_list_response.py @@ -15,6 +15,12 @@ class Eval(BaseModel): how """ + display_name: str + """Human-friendly name for display. + + For default evals, prefer standardized labels; otherwise use configured name. + """ + eval_key: str """ Unique key for eval metric - currently maps to the TrustworthyRAG name property diff --git a/src/codex/types/projects/query_log_list_by_group_response.py b/src/codex/types/projects/query_log_list_by_group_response.py index fc2e904..632a375 100644 --- a/src/codex/types/projects/query_log_list_by_group_response.py +++ b/src/codex/types/projects/query_log_list_by_group_response.py @@ -50,24 +50,32 @@ class QueryLogsByGroupQueryLogFormattedEscalationEvalScores(BaseModel): + display_name: str + score: float status: Literal["pass", "fail"] class QueryLogsByGroupQueryLogFormattedEvalScores(BaseModel): + display_name: str + score: float status: Literal["pass", "fail"] class QueryLogsByGroupQueryLogFormattedGuardrailEvalScores(BaseModel): + display_name: str + score: float status: Literal["pass", "fail"] class QueryLogsByGroupQueryLogFormattedNonGuardrailEvalScores(BaseModel): + display_name: str + score: float status: Literal["pass", "fail"] @@ -384,6 +392,9 @@ class QueryLogsByGroupQueryLog(BaseModel): escalation_evals: Optional[List[str]] = None """Evals that should trigger escalation to SME""" + eval_display_names: Optional[Dict[str, str]] = None + """Mapping of eval keys to display names at time of creation""" + eval_issue_labels: Optional[List[str]] = None """Labels derived from evaluation scores""" @@ -402,6 +413,18 @@ class QueryLogsByGroupQueryLog(BaseModel): Used to log tool calls in the query log. """ + expert_guardrail_override_explanation: Optional[str] = None + """ + Explanation of why the response was either guardrailed or not guardrailed by + expert review. Expert review will override the original guardrail decision. + """ + + expert_override_log_id: Optional[str] = None + """ + ID of the query log with expert review that overrode the original guardrail + decision. + """ + expert_review_created_at: Optional[datetime] = None """When the expert review was created""" @@ -430,6 +453,12 @@ class QueryLogsByGroupQueryLog(BaseModel): itself. """ + original_assistant_response: Optional[str] = None + """The original assistant response that would have been displayed to the user. + + This may be `None` if this is a tool call step. + """ + original_question: Optional[str] = None """The original question that was asked before any rewriting or processing. @@ -443,9 +472,6 @@ class QueryLogsByGroupQueryLog(BaseModel): primary_eval_issue_score: Optional[float] = None """Score of the primary eval issue""" - similar_query_log_guardrail_explanation: Optional[str] = None - """Explanation from a similar bad query log that caused this to be guardrailed""" - tools: Optional[List[QueryLogsByGroupQueryLogTool]] = None """Tools to use for the LLM call. diff --git a/src/codex/types/projects/query_log_list_groups_response.py b/src/codex/types/projects/query_log_list_groups_response.py index e61b651..3d894f1 100644 --- a/src/codex/types/projects/query_log_list_groups_response.py +++ b/src/codex/types/projects/query_log_list_groups_response.py @@ -47,24 +47,32 @@ class FormattedEscalationEvalScores(BaseModel): + display_name: str + score: float status: Literal["pass", "fail"] class FormattedEvalScores(BaseModel): + display_name: str + score: float status: Literal["pass", "fail"] class FormattedGuardrailEvalScores(BaseModel): + display_name: str + score: float status: Literal["pass", "fail"] class FormattedNonGuardrailEvalScores(BaseModel): + display_name: str + score: float status: Literal["pass", "fail"] @@ -379,6 +387,9 @@ class QueryLogListGroupsResponse(BaseModel): escalation_evals: Optional[List[str]] = None """Evals that should trigger escalation to SME""" + eval_display_names: Optional[Dict[str, str]] = None + """Mapping of eval keys to display names at time of creation""" + eval_issue_labels: Optional[List[str]] = None """Labels derived from evaluation scores""" @@ -397,6 +408,18 @@ class QueryLogListGroupsResponse(BaseModel): Used to log tool calls in the query log. """ + expert_guardrail_override_explanation: Optional[str] = None + """ + Explanation of why the response was either guardrailed or not guardrailed by + expert review. Expert review will override the original guardrail decision. + """ + + expert_override_log_id: Optional[str] = None + """ + ID of the query log with expert review that overrode the original guardrail + decision. + """ + expert_review_created_at: Optional[datetime] = None """When the expert review was created""" @@ -425,6 +448,12 @@ class QueryLogListGroupsResponse(BaseModel): itself. """ + original_assistant_response: Optional[str] = None + """The original assistant response that would have been displayed to the user. + + This may be `None` if this is a tool call step. + """ + original_question: Optional[str] = None """The original question that was asked before any rewriting or processing. @@ -438,9 +467,6 @@ class QueryLogListGroupsResponse(BaseModel): primary_eval_issue_score: Optional[float] = None """Score of the primary eval issue""" - similar_query_log_guardrail_explanation: Optional[str] = None - """Explanation from a similar bad query log that caused this to be guardrailed""" - tools: Optional[List[Tool]] = None """Tools to use for the LLM call. diff --git a/src/codex/types/projects/query_log_list_response.py b/src/codex/types/projects/query_log_list_response.py index 6494d83..8e57871 100644 --- a/src/codex/types/projects/query_log_list_response.py +++ b/src/codex/types/projects/query_log_list_response.py @@ -47,24 +47,32 @@ class FormattedEscalationEvalScores(BaseModel): + display_name: str + score: float status: Literal["pass", "fail"] class FormattedEvalScores(BaseModel): + display_name: str + score: float status: Literal["pass", "fail"] class FormattedGuardrailEvalScores(BaseModel): + display_name: str + score: float status: Literal["pass", "fail"] class FormattedNonGuardrailEvalScores(BaseModel): + display_name: str + score: float status: Literal["pass", "fail"] @@ -367,6 +375,9 @@ class QueryLogListResponse(BaseModel): escalation_evals: Optional[List[str]] = None """Evals that should trigger escalation to SME""" + eval_display_names: Optional[Dict[str, str]] = None + """Mapping of eval keys to display names at time of creation""" + eval_issue_labels: Optional[List[str]] = None """Labels derived from evaluation scores""" @@ -385,6 +396,18 @@ class QueryLogListResponse(BaseModel): Used to log tool calls in the query log. """ + expert_guardrail_override_explanation: Optional[str] = None + """ + Explanation of why the response was either guardrailed or not guardrailed by + expert review. Expert review will override the original guardrail decision. + """ + + expert_override_log_id: Optional[str] = None + """ + ID of the query log with expert review that overrode the original guardrail + decision. + """ + expert_review_created_at: Optional[datetime] = None """When the expert review was created""" @@ -410,6 +433,12 @@ class QueryLogListResponse(BaseModel): itself. """ + original_assistant_response: Optional[str] = None + """The original assistant response that would have been displayed to the user. + + This may be `None` if this is a tool call step. + """ + original_question: Optional[str] = None """The original question that was asked before any rewriting or processing. @@ -423,9 +452,6 @@ class QueryLogListResponse(BaseModel): primary_eval_issue_score: Optional[float] = None """Score of the primary eval issue""" - similar_query_log_guardrail_explanation: Optional[str] = None - """Explanation from a similar bad query log that caused this to be guardrailed""" - tools: Optional[List[Tool]] = None """Tools to use for the LLM call. diff --git a/src/codex/types/projects/query_log_retrieve_response.py b/src/codex/types/projects/query_log_retrieve_response.py index abfaeeb..ef1aee6 100644 --- a/src/codex/types/projects/query_log_retrieve_response.py +++ b/src/codex/types/projects/query_log_retrieve_response.py @@ -47,24 +47,32 @@ class FormattedEscalationEvalScores(BaseModel): + display_name: str + score: float status: Literal["pass", "fail"] class FormattedEvalScores(BaseModel): + display_name: str + score: float status: Literal["pass", "fail"] class FormattedGuardrailEvalScores(BaseModel): + display_name: str + score: float status: Literal["pass", "fail"] class FormattedNonGuardrailEvalScores(BaseModel): + display_name: str + score: float status: Literal["pass", "fail"] @@ -371,6 +379,9 @@ class QueryLogRetrieveResponse(BaseModel): escalation_evals: Optional[List[str]] = None """Evals that should trigger escalation to SME""" + eval_display_names: Optional[Dict[str, str]] = None + """Mapping of eval keys to display names at time of creation""" + eval_issue_labels: Optional[List[str]] = None """Labels derived from evaluation scores""" @@ -389,6 +400,18 @@ class QueryLogRetrieveResponse(BaseModel): Used to log tool calls in the query log. """ + expert_guardrail_override_explanation: Optional[str] = None + """ + Explanation of why the response was either guardrailed or not guardrailed by + expert review. Expert review will override the original guardrail decision. + """ + + expert_override_log_id: Optional[str] = None + """ + ID of the query log with expert review that overrode the original guardrail + decision. + """ + expert_review_created_at: Optional[datetime] = None """When the expert review was created""" @@ -417,6 +440,12 @@ class QueryLogRetrieveResponse(BaseModel): itself. """ + original_assistant_response: Optional[str] = None + """The original assistant response that would have been displayed to the user. + + This may be `None` if this is a tool call step. + """ + original_question: Optional[str] = None """The original question that was asked before any rewriting or processing. @@ -430,9 +459,6 @@ class QueryLogRetrieveResponse(BaseModel): primary_eval_issue_score: Optional[float] = None """Score of the primary eval issue""" - similar_query_log_guardrail_explanation: Optional[str] = None - """Explanation from a similar bad query log that caused this to be guardrailed""" - tools: Optional[List[Tool]] = None """Tools to use for the LLM call. diff --git a/src/codex/types/projects/remediation_list_resolved_logs_response.py b/src/codex/types/projects/remediation_list_resolved_logs_response.py index e2239c4..f85be78 100644 --- a/src/codex/types/projects/remediation_list_resolved_logs_response.py +++ b/src/codex/types/projects/remediation_list_resolved_logs_response.py @@ -48,24 +48,32 @@ class QueryLogFormattedEscalationEvalScores(BaseModel): + display_name: str + score: float status: Literal["pass", "fail"] class QueryLogFormattedEvalScores(BaseModel): + display_name: str + score: float status: Literal["pass", "fail"] class QueryLogFormattedGuardrailEvalScores(BaseModel): + display_name: str + score: float status: Literal["pass", "fail"] class QueryLogFormattedNonGuardrailEvalScores(BaseModel): + display_name: str + score: float status: Literal["pass", "fail"] @@ -374,6 +382,9 @@ class QueryLog(BaseModel): escalation_evals: Optional[List[str]] = None """Evals that should trigger escalation to SME""" + eval_display_names: Optional[Dict[str, str]] = None + """Mapping of eval keys to display names at time of creation""" + eval_issue_labels: Optional[List[str]] = None """Labels derived from evaluation scores""" @@ -392,6 +403,18 @@ class QueryLog(BaseModel): Used to log tool calls in the query log. """ + expert_guardrail_override_explanation: Optional[str] = None + """ + Explanation of why the response was either guardrailed or not guardrailed by + expert review. Expert review will override the original guardrail decision. + """ + + expert_override_log_id: Optional[str] = None + """ + ID of the query log with expert review that overrode the original guardrail + decision. + """ + expert_review_created_at: Optional[datetime] = None """When the expert review was created""" @@ -417,6 +440,12 @@ class QueryLog(BaseModel): itself. """ + original_assistant_response: Optional[str] = None + """The original assistant response that would have been displayed to the user. + + This may be `None` if this is a tool call step. + """ + original_question: Optional[str] = None """The original question that was asked before any rewriting or processing. @@ -430,9 +459,6 @@ class QueryLog(BaseModel): primary_eval_issue_score: Optional[float] = None """Score of the primary eval issue""" - similar_query_log_guardrail_explanation: Optional[str] = None - """Explanation from a similar bad query log that caused this to be guardrailed""" - tools: Optional[List[QueryLogTool]] = None """Tools to use for the LLM call. diff --git a/tests/api_resources/test_projects.py b/tests/api_resources/test_projects.py index cfbfe41..564da9a 100644 --- a/tests/api_resources/test_projects.py +++ b/tests/api_resources/test_projects.py @@ -743,6 +743,7 @@ def test_method_retrieve_analytics_with_all_params(self, client: Codex) -> None: project = client.projects.retrieve_analytics( project_id="182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e", end=0, + metadata_filters="metadata_filters", start=0, ) assert_matches_type(ProjectRetrieveAnalyticsResponse, project, path=["response"]) @@ -1630,6 +1631,7 @@ async def test_method_retrieve_analytics_with_all_params(self, async_client: Asy project = await async_client.projects.retrieve_analytics( project_id="182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e", end=0, + metadata_filters="metadata_filters", start=0, ) assert_matches_type(ProjectRetrieveAnalyticsResponse, project, path=["response"])