diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index a6979ebb..40152386 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -36,7 +36,7 @@ jobs:
         run: ./scripts/lint
 
   build:
-    if: github.repository == 'stainless-sdks/codex-python' && (github.event_name == 'push' || github.event.pull_request.head.repo.fork)
+    if: github.event_name == 'push' || github.event.pull_request.head.repo.fork
     timeout-minutes: 10
     name: build
     permissions:
@@ -61,12 +61,14 @@ jobs:
         run: rye build
 
       - name: Get GitHub OIDC Token
+        if: github.repository == 'stainless-sdks/codex-python'
         id: github-oidc
         uses: actions/github-script@v6
         with:
           script: core.setOutput('github_token', await core.getIDToken());
 
       - name: Upload tarball
+        if: github.repository == 'stainless-sdks/codex-python'
         env:
           URL: https://pkg.stainless.com/s
           AUTH: ${{ steps.github-oidc.outputs.github_token }}
diff --git a/.release-please-manifest.json b/.release-please-manifest.json
index 380b6f91..3188cedb 100644
--- a/.release-please-manifest.json
+++ b/.release-please-manifest.json
@@ -1,3 +1,3 @@
 {
-  ".": "0.1.0-alpha.24"
+  ".": "0.1.0-alpha.25"
 }
\ No newline at end of file
diff --git a/.stats.yml b/.stats.yml
index 4f2aa488..b94f7cf2 100644
--- a/.stats.yml
+++ b/.stats.yml
@@ -1,3 +1,3 @@
 configured_endpoints: 54
-openapi_spec_hash: 49989625bf633c5fdb3e11140f788f2d
+openapi_spec_hash: 7daf4896ba4932714f8fe4fff277d7c7
 config_hash: 930284cfa37f835d949c8a1b124f4807
diff --git a/CHANGELOG.md b/CHANGELOG.md
index e4f0a42f..51dbaaca 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,30 @@
 # Changelog
 
+## 0.1.0-alpha.25 (2025-08-22)
+
+Full Changelog: [v0.1.0-alpha.24...v0.1.0-alpha.25](https://github.com/cleanlab/codex-python/compare/v0.1.0-alpha.24...v0.1.0-alpha.25)
+
+### Features
+
+* **api:** api update ([4bc752b](https://github.com/cleanlab/codex-python/commit/4bc752b640eec68c028e794c9f2c31aed9838e4e))
+* **api:** api update ([d51e2a9](https://github.com/cleanlab/codex-python/commit/d51e2a96fecd86c009beb70094671de889efb0b1))
+* **api:** api update ([d3a1591](https://github.com/cleanlab/codex-python/commit/d3a15910605f36066acb6529614d2e327500006d))
+* **api:** api update ([eddd6b2](https://github.com/cleanlab/codex-python/commit/eddd6b26ab55327c807f57e30f4ec2c9003a97d6))
+* **api:** api update ([9c5d83e](https://github.com/cleanlab/codex-python/commit/9c5d83e1d073b04af320bb53099f7a0540479698))
+* **api:** api update ([bdfdd70](https://github.com/cleanlab/codex-python/commit/bdfdd70d31dc1e83af8507d5c264f7e9e08f2a99))
+* **api:** api update ([70be786](https://github.com/cleanlab/codex-python/commit/70be78674ba9b7f67398efac93bb8c5094616d8f))
+* **client:** support file upload requests ([370bb62](https://github.com/cleanlab/codex-python/commit/370bb62b04ff454dff20252887f3f441f762b2aa))
+
+
+### Chores
+
+* **internal:** codegen related update ([04b7858](https://github.com/cleanlab/codex-python/commit/04b785856210a867d07f973d2c53c9dcecd03981))
+* **internal:** codegen related update ([6e1f882](https://github.com/cleanlab/codex-python/commit/6e1f8826a9a8c9867cf758528cf99040cb499c1a))
+* **internal:** fix ruff target version ([6d3b8a2](https://github.com/cleanlab/codex-python/commit/6d3b8a2dc580ca9fddd2ea82147116465373739f))
+* **internal:** update comment in script ([dfc7725](https://github.com/cleanlab/codex-python/commit/dfc7725fab501bc8812028a6eb93ecd4e5b0f4da))
+* update @stainless-api/prism-cli to v5.15.0 ([b46d19e](https://github.com/cleanlab/codex-python/commit/b46d19ec2a1058452eea206a8833413466e8ff73))
+* update github action ([5608ea3](https://github.com/cleanlab/codex-python/commit/5608ea3c1e5980d747fb471bd25ada79e0df8c04))
+
 ## 0.1.0-alpha.24 (2025-07-28)
 
 Full Changelog: [v0.1.0-alpha.23...v0.1.0-alpha.24](https://github.com/cleanlab/codex-python/compare/v0.1.0-alpha.23...v0.1.0-alpha.24)
diff --git a/pyproject.toml b/pyproject.toml
index a0611005..3209698a 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "codex-sdk"
-version = "0.1.0-alpha.24"
+version = "0.1.0-alpha.25"
 description = "Internal SDK used within cleanlab-codex package. Refer to https://pypi.org/project/cleanlab-codex/ instead."
 dynamic = ["readme"]
 license = "MIT"
@@ -159,7 +159,7 @@ reportPrivateUsage = false
 [tool.ruff]
 line-length = 120
 output-format = "grouped"
-target-version = "py37"
+target-version = "py38"
 
 [tool.ruff.format]
 docstring-code-format = true
diff --git a/scripts/mock b/scripts/mock
index d2814ae6..0b28f6ea 100755
--- a/scripts/mock
+++ b/scripts/mock
@@ -21,7 +21,7 @@ echo "==> Starting mock server with URL ${URL}"
 
 # Run prism mock on the given spec
 if [ "$1" == "--daemon" ]; then
-  npm exec --package=@stainless-api/prism-cli@5.8.5 -- prism mock "$URL" &> .prism.log &
+  npm exec --package=@stainless-api/prism-cli@5.15.0 -- prism mock "$URL" &> .prism.log &
 
   # Wait for server to come online
   echo -n "Waiting for server"
@@ -37,5 +37,5 @@ if [ "$1" == "--daemon" ]; then
 
   echo
 else
-  npm exec --package=@stainless-api/prism-cli@5.8.5 -- prism mock "$URL"
+  npm exec --package=@stainless-api/prism-cli@5.15.0 -- prism mock "$URL"
 fi
diff --git a/scripts/test b/scripts/test
index 2b878456..dbeda2d2 100755
--- a/scripts/test
+++ b/scripts/test
@@ -43,7 +43,7 @@ elif ! prism_is_running ; then
   echo -e "To run the server, pass in the path or url of your OpenAPI"
   echo -e "spec to the prism command:"
   echo
-  echo -e "  \$ ${YELLOW}npm exec --package=@stoplight/prism-cli@~5.3.2 -- prism mock path/to/your.openapi.yml${NC}"
+  echo -e "  \$ ${YELLOW}npm exec --package=@stainless-api/prism-cli@5.15.0 -- prism mock path/to/your.openapi.yml${NC}"
   echo
 
   exit 1
diff --git a/src/codex/_base_client.py b/src/codex/_base_client.py
index 6da89f6c..870a4729 100644
--- a/src/codex/_base_client.py
+++ b/src/codex/_base_client.py
@@ -532,7 +532,10 @@ def _build_request(
         is_body_allowed = options.method.lower() != "get"
 
         if is_body_allowed:
-            kwargs["json"] = json_data if is_given(json_data) else None
+            if isinstance(json_data, bytes):
+                kwargs["content"] = json_data
+            else:
+                kwargs["json"] = json_data if is_given(json_data) else None
             kwargs["files"] = files
         else:
             headers.pop("Content-Type", None)
diff --git a/src/codex/_files.py b/src/codex/_files.py
index 715cc207..cc14c14f 100644
--- a/src/codex/_files.py
+++ b/src/codex/_files.py
@@ -69,12 +69,12 @@ def _transform_file(file: FileTypes) -> HttpxFileTypes:
         return file
 
     if is_tuple_t(file):
-        return (file[0], _read_file_content(file[1]), *file[2:])
+        return (file[0], read_file_content(file[1]), *file[2:])
 
     raise TypeError(f"Expected file types input to be a FileContent type or to be a tuple")
 
 
-def _read_file_content(file: FileContent) -> HttpxFileContent:
+def read_file_content(file: FileContent) -> HttpxFileContent:
     if isinstance(file, os.PathLike):
         return pathlib.Path(file).read_bytes()
     return file
@@ -111,12 +111,12 @@ async def _async_transform_file(file: FileTypes) -> HttpxFileTypes:
         return file
 
     if is_tuple_t(file):
-        return (file[0], await _async_read_file_content(file[1]), *file[2:])
+        return (file[0], await async_read_file_content(file[1]), *file[2:])
 
     raise TypeError(f"Expected file types input to be a FileContent type or to be a tuple")
 
 
-async def _async_read_file_content(file: FileContent) -> HttpxFileContent:
+async def async_read_file_content(file: FileContent) -> HttpxFileContent:
     if isinstance(file, os.PathLike):
         return await anyio.Path(file).read_bytes()
 
diff --git a/src/codex/_version.py b/src/codex/_version.py
index e020cb91..656ce65e 100644
--- a/src/codex/_version.py
+++ b/src/codex/_version.py
@@ -1,4 +1,4 @@
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 __title__ = "codex"
-__version__ = "0.1.0-alpha.24"  # x-release-please-version
+__version__ = "0.1.0-alpha.25"  # x-release-please-version
diff --git a/src/codex/resources/projects/projects.py b/src/codex/resources/projects/projects.py
index f82bcd03..319097fd 100644
--- a/src/codex/resources/projects/projects.py
+++ b/src/codex/resources/projects/projects.py
@@ -352,7 +352,7 @@ def invite_sme(
         project_id: str,
         *,
         email: str,
-        page_type: Literal["query_log", "remediation"],
+        page_type: Literal["query_log", "remediation", "prioritized_issue"],
         url_query_string: str,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
@@ -449,7 +449,7 @@ def validate(
         context: str,
         query: str,
         response: project_validate_params.Response,
-        use_llm_matching: bool | NotGiven = NOT_GIVEN,
+        use_llm_matching: Optional[bool] | NotGiven = NOT_GIVEN,
         constrain_outputs: Optional[List[str]] | NotGiven = NOT_GIVEN,
         custom_eval_thresholds: Optional[Dict[str, float]] | NotGiven = NOT_GIVEN,
         custom_metadata: Optional[object] | NotGiven = NOT_GIVEN,
@@ -520,60 +520,65 @@ def validate(
               `model`, and `max_tokens` is set to 512. You can set custom values for these
               arguments regardless of the quality preset specified.
 
-              Args: model ({"gpt-4.1", "gpt-4.1-mini", "gpt-4.1-nano", "o4-mini", "o3",
-              "gpt-4.5-preview", "gpt-4o-mini", "gpt-4o", "o3-mini", "o1", "o1-mini", "gpt-4",
-              "gpt-3.5-turbo-16k", "claude-opus-4-0", "claude-sonnet-4-0",
-              "claude-3.7-sonnet", "claude-3.5-sonnet-v2", "claude-3.5-sonnet",
-              "claude-3.5-haiku", "claude-3-haiku", "nova-micro", "nova-lite", "nova-pro"},
-              default = "gpt-4.1-mini"): Underlying base LLM to use (better models yield
-              better results, faster models yield faster results). - Models still in beta:
-              "o3", "o1", "o4-mini", "o3-mini", "o1-mini", "gpt-4.5-preview",
-              "claude-opus-4-0", "claude-sonnet-4-0", "claude-3.7-sonnet",
-              "claude-3.5-haiku". - Recommended models for accuracy: "gpt-4.1", "o4-mini",
-              "o3", "claude-opus-4-0", "claude-sonnet-4-0". - Recommended models for low
-              latency/costs: "gpt-4.1-nano", "nova-micro".
-
-                  max_tokens (int, default = 512): the maximum number of tokens that can be generated in the TLM response (and in internal trustworthiness scoring).
-                  Higher values here may produce better (more reliable) TLM responses and trustworthiness scores, but at higher runtimes/costs.
-                  If you experience token/rate limit errors while using TLM, try lowering this number.
+              Args: model ({"gpt-5", "gpt-5-mini", "gpt-5-nano", "gpt-4.1", "gpt-4.1-mini",
+              "gpt-4.1-nano", "o4-mini", "o3", "gpt-4.5-preview", "gpt-4o-mini", "gpt-4o",
+              "o3-mini", "o1", "o1-mini", "gpt-4", "gpt-3.5-turbo-16k", "claude-opus-4-0",
+              "claude-sonnet-4-0", "claude-3.7-sonnet", "claude-3.5-sonnet-v2",
+              "claude-3.5-sonnet", "claude-3.5-haiku", "claude-3-haiku", "nova-micro",
+              "nova-lite", "nova-pro"}, default = "gpt-4.1-mini"): Underlying base LLM to use
+              (better models yield better results, faster models yield faster results). -
+              Models still in beta: "o3", "o1", "o4-mini", "o3-mini", "o1-mini",
+              "gpt-4.5-preview", "claude-opus-4-0", "claude-sonnet-4-0", "claude-3.7-sonnet",
+              "claude-3.5-haiku". - Recommended models for accuracy: "gpt-5", "gpt-4.1",
+              "o4-mini", "o3", "claude-opus-4-0", "claude-sonnet-4-0". - Recommended models
+              for low latency/costs: "gpt-4.1-nano", "nova-micro".
+
+                  log (list[str], default = []): optionally specify additional logs or metadata that TLM should return.
+                  For instance, include "explanation" here to get explanations of why a response is scored with low trustworthiness.
+
+                  custom_eval_criteria (list[dict[str, Any]], default = []): optionally specify custom evalution criteria beyond the built-in trustworthiness scoring.
+                  The expected input format is a list of dictionaries, where each dictionary has the following keys:
+                  - name: Name of the evaluation criteria.
+                  - criteria: Instructions specifying the evaluation criteria.
+
+                  max_tokens (int, default = 512): the maximum number of tokens that can be generated in the response from `TLM.prompt()` as well as during internal trustworthiness scoring.
+                  If you experience token/rate-limit errors, try lowering this number.
                   For OpenAI models, this parameter must be between 64 and 4096. For Claude models, this parameter must be between 64 and 512.
 
-                  num_candidate_responses (int, default = 1): how many alternative candidate responses are internally generated in `TLM.prompt()`.
-                  `TLM.prompt()` scores the trustworthiness of each candidate response, and then returns the most trustworthy one.
-                  This parameter must be between 1 and 20. It has no effect on `TLM.score()`.
-                  Higher values here can produce more accurate responses from `TLM.prompt()`, but at higher runtimes/costs.
-                  When it is 1, `TLM.prompt()` simply returns a standard LLM response and does not attempt to auto-improve it.
+                  reasoning_effort ({"none", "low", "medium", "high"}, default = "high"): how much internal LLM calls are allowed to reason (number of thinking tokens)
+                  when generating alternative possible responses and reflecting on responses during trustworthiness scoring.
+                  Reduce this value to reduce runtimes. Higher values may improve trust scoring.
+
+                  num_self_reflections (int, default = 3): the number of different evaluations to perform where the LLM reflects on the response, a factor affecting trust scoring.
+                  The maximum number currently supported is 3. Lower values can reduce runtimes.
+                  Reflection helps quantify aleatoric uncertainty associated with challenging prompts and catches responses that are noticeably incorrect/bad upon further analysis.
+                  This parameter has no effect when `disable_trustworthiness` is True.
 
-                  num_consistency_samples (int, default = 8): the amount of internal sampling to measure LLM response consistency, a factor affecting trustworthiness scoring.
-                  Must be between 0 and 20. Higher values produce more reliable TLM trustworthiness scores, but at higher runtimes/costs.
+                  num_consistency_samples (int, default = 8): the amount of internal sampling to measure LLM response consistency, a factor affecting trust scoring.
+                  Must be between 0 and 20. Lower values can reduce runtimes.
                   Measuring consistency helps quantify the epistemic uncertainty associated with
                   strange prompts or prompts that are too vague/open-ended to receive a clearly defined 'good' response.
                   TLM measures consistency via the degree of contradiction between sampled responses that the model considers plausible.
-
-                  num_self_reflections(int, default = 3): the number of self-reflections to perform where the LLM is asked to reflect on the given response and directly evaluate correctness/confidence.
-                  The maximum number of self-reflections currently supported is 3. Lower values will reduce runtimes/costs, but potentially also the reliability of trustworthiness scores.
-                  Reflection helps quantify aleatoric uncertainty associated with challenging prompts and catches responses that are noticeably incorrect/bad upon further analysis.
+                  This parameter has no effect when `disable_trustworthiness` is True.
 
                   similarity_measure ({"semantic", "string", "embedding", "embedding_large", "code", "discrepancy"}, default = "discrepancy"): how the
                   trustworthiness scoring's consistency algorithm measures similarity between alternative responses considered plausible by the model.
                   Supported similarity measures include - "semantic" (based on natural language inference),
                   "embedding" (based on vector embedding similarity), "embedding_large" (based on a larger embedding model),
                   "code" (based on model-based analysis designed to compare code), "discrepancy" (based on model-based analysis of possible discrepancies),
-                  and "string" (based on character/word overlap). Set this to "string" for minimal runtimes/costs.
-
-                  reasoning_effort ({"none", "low", "medium", "high"}, default = "high"): how much internal LLM calls are allowed to reason (number of thinking tokens)
-                  when generating alternative possible responses and reflecting on responses during trustworthiness scoring.
-                  Higher reasoning efforts may yield more reliable TLM trustworthiness scores. Reduce this value to reduce runtimes/costs.
-
-                  log (list[str], default = []): optionally specify additional logs or metadata that TLM should return.
-                  For instance, include "explanation" here to get explanations of why a response is scored with low trustworthiness.
+                  and "string" (based on character/word overlap). Set this to "string" for minimal runtimes.
+                  This parameter has no effect when `num_consistency_samples = 0`.
 
-                  custom_eval_criteria (list[dict[str, Any]], default = []): optionally specify custom evalution criteria beyond the built-in trustworthiness scoring.
-                  The expected input format is a list of dictionaries, where each dictionary has the following keys:
-                  - name: Name of the evaluation criteria.
-                  - criteria: Instructions specifying the evaluation criteria.
+                  num_candidate_responses (int, default = 1): how many alternative candidate responses are internally generated in `TLM.prompt()`.
+                  `TLM.prompt()` scores the trustworthiness of each candidate response, and then returns the most trustworthy one.
+                  You can auto-improve responses by increasing this parameter, but at higher runtimes/costs.
+                  This parameter must be between 1 and 20. It has no effect on `TLM.score()`.
+                  When this parameter is 1, `TLM.prompt()` simply returns a standard LLM response and does not attempt to auto-improve it.
+                  This parameter has no effect when `disable_trustworthiness` is True.
 
-                  use_self_reflection (bool, default = `True`): deprecated. Use `num_self_reflections` instead.
+                  disable_trustworthiness (bool, default = False): if True, trustworthiness scoring is disabled and TLM will not compute trust scores for responses.
+                  This is useful when you only want to use custom evaluation criteria or when you want to minimize computational overhead and only need the base LLM response.
+                  The following parameters will be ignored when `disable_trustworthiness` is True: `num_consistency_samples`, `num_self_reflections`, `num_candidate_responses`, `reasoning_effort`, `similarity_measure`.
 
           prompt: The prompt to use for the TLM call. If not provided, the prompt will be
               generated from the messages.
@@ -925,7 +930,7 @@ async def invite_sme(
         project_id: str,
         *,
         email: str,
-        page_type: Literal["query_log", "remediation"],
+        page_type: Literal["query_log", "remediation", "prioritized_issue"],
         url_query_string: str,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
@@ -1022,7 +1027,7 @@ async def validate(
         context: str,
         query: str,
         response: project_validate_params.Response,
-        use_llm_matching: bool | NotGiven = NOT_GIVEN,
+        use_llm_matching: Optional[bool] | NotGiven = NOT_GIVEN,
         constrain_outputs: Optional[List[str]] | NotGiven = NOT_GIVEN,
         custom_eval_thresholds: Optional[Dict[str, float]] | NotGiven = NOT_GIVEN,
         custom_metadata: Optional[object] | NotGiven = NOT_GIVEN,
@@ -1093,60 +1098,65 @@ async def validate(
               `model`, and `max_tokens` is set to 512. You can set custom values for these
               arguments regardless of the quality preset specified.
 
-              Args: model ({"gpt-4.1", "gpt-4.1-mini", "gpt-4.1-nano", "o4-mini", "o3",
-              "gpt-4.5-preview", "gpt-4o-mini", "gpt-4o", "o3-mini", "o1", "o1-mini", "gpt-4",
-              "gpt-3.5-turbo-16k", "claude-opus-4-0", "claude-sonnet-4-0",
-              "claude-3.7-sonnet", "claude-3.5-sonnet-v2", "claude-3.5-sonnet",
-              "claude-3.5-haiku", "claude-3-haiku", "nova-micro", "nova-lite", "nova-pro"},
-              default = "gpt-4.1-mini"): Underlying base LLM to use (better models yield
-              better results, faster models yield faster results). - Models still in beta:
-              "o3", "o1", "o4-mini", "o3-mini", "o1-mini", "gpt-4.5-preview",
-              "claude-opus-4-0", "claude-sonnet-4-0", "claude-3.7-sonnet",
-              "claude-3.5-haiku". - Recommended models for accuracy: "gpt-4.1", "o4-mini",
-              "o3", "claude-opus-4-0", "claude-sonnet-4-0". - Recommended models for low
-              latency/costs: "gpt-4.1-nano", "nova-micro".
-
-                  max_tokens (int, default = 512): the maximum number of tokens that can be generated in the TLM response (and in internal trustworthiness scoring).
-                  Higher values here may produce better (more reliable) TLM responses and trustworthiness scores, but at higher runtimes/costs.
-                  If you experience token/rate limit errors while using TLM, try lowering this number.
+              Args: model ({"gpt-5", "gpt-5-mini", "gpt-5-nano", "gpt-4.1", "gpt-4.1-mini",
+              "gpt-4.1-nano", "o4-mini", "o3", "gpt-4.5-preview", "gpt-4o-mini", "gpt-4o",
+              "o3-mini", "o1", "o1-mini", "gpt-4", "gpt-3.5-turbo-16k", "claude-opus-4-0",
+              "claude-sonnet-4-0", "claude-3.7-sonnet", "claude-3.5-sonnet-v2",
+              "claude-3.5-sonnet", "claude-3.5-haiku", "claude-3-haiku", "nova-micro",
+              "nova-lite", "nova-pro"}, default = "gpt-4.1-mini"): Underlying base LLM to use
+              (better models yield better results, faster models yield faster results). -
+              Models still in beta: "o3", "o1", "o4-mini", "o3-mini", "o1-mini",
+              "gpt-4.5-preview", "claude-opus-4-0", "claude-sonnet-4-0", "claude-3.7-sonnet",
+              "claude-3.5-haiku". - Recommended models for accuracy: "gpt-5", "gpt-4.1",
+              "o4-mini", "o3", "claude-opus-4-0", "claude-sonnet-4-0". - Recommended models
+              for low latency/costs: "gpt-4.1-nano", "nova-micro".
+
+                  log (list[str], default = []): optionally specify additional logs or metadata that TLM should return.
+                  For instance, include "explanation" here to get explanations of why a response is scored with low trustworthiness.
+
+                  custom_eval_criteria (list[dict[str, Any]], default = []): optionally specify custom evalution criteria beyond the built-in trustworthiness scoring.
+                  The expected input format is a list of dictionaries, where each dictionary has the following keys:
+                  - name: Name of the evaluation criteria.
+                  - criteria: Instructions specifying the evaluation criteria.
+
+                  max_tokens (int, default = 512): the maximum number of tokens that can be generated in the response from `TLM.prompt()` as well as during internal trustworthiness scoring.
+                  If you experience token/rate-limit errors, try lowering this number.
                   For OpenAI models, this parameter must be between 64 and 4096. For Claude models, this parameter must be between 64 and 512.
 
-                  num_candidate_responses (int, default = 1): how many alternative candidate responses are internally generated in `TLM.prompt()`.
-                  `TLM.prompt()` scores the trustworthiness of each candidate response, and then returns the most trustworthy one.
-                  This parameter must be between 1 and 20. It has no effect on `TLM.score()`.
-                  Higher values here can produce more accurate responses from `TLM.prompt()`, but at higher runtimes/costs.
-                  When it is 1, `TLM.prompt()` simply returns a standard LLM response and does not attempt to auto-improve it.
+                  reasoning_effort ({"none", "low", "medium", "high"}, default = "high"): how much internal LLM calls are allowed to reason (number of thinking tokens)
+                  when generating alternative possible responses and reflecting on responses during trustworthiness scoring.
+                  Reduce this value to reduce runtimes. Higher values may improve trust scoring.
+
+                  num_self_reflections (int, default = 3): the number of different evaluations to perform where the LLM reflects on the response, a factor affecting trust scoring.
+                  The maximum number currently supported is 3. Lower values can reduce runtimes.
+                  Reflection helps quantify aleatoric uncertainty associated with challenging prompts and catches responses that are noticeably incorrect/bad upon further analysis.
+                  This parameter has no effect when `disable_trustworthiness` is True.
 
-                  num_consistency_samples (int, default = 8): the amount of internal sampling to measure LLM response consistency, a factor affecting trustworthiness scoring.
-                  Must be between 0 and 20. Higher values produce more reliable TLM trustworthiness scores, but at higher runtimes/costs.
+                  num_consistency_samples (int, default = 8): the amount of internal sampling to measure LLM response consistency, a factor affecting trust scoring.
+                  Must be between 0 and 20. Lower values can reduce runtimes.
                   Measuring consistency helps quantify the epistemic uncertainty associated with
                   strange prompts or prompts that are too vague/open-ended to receive a clearly defined 'good' response.
                   TLM measures consistency via the degree of contradiction between sampled responses that the model considers plausible.
-
-                  num_self_reflections(int, default = 3): the number of self-reflections to perform where the LLM is asked to reflect on the given response and directly evaluate correctness/confidence.
-                  The maximum number of self-reflections currently supported is 3. Lower values will reduce runtimes/costs, but potentially also the reliability of trustworthiness scores.
-                  Reflection helps quantify aleatoric uncertainty associated with challenging prompts and catches responses that are noticeably incorrect/bad upon further analysis.
+                  This parameter has no effect when `disable_trustworthiness` is True.
 
                   similarity_measure ({"semantic", "string", "embedding", "embedding_large", "code", "discrepancy"}, default = "discrepancy"): how the
                   trustworthiness scoring's consistency algorithm measures similarity between alternative responses considered plausible by the model.
                   Supported similarity measures include - "semantic" (based on natural language inference),
                   "embedding" (based on vector embedding similarity), "embedding_large" (based on a larger embedding model),
                   "code" (based on model-based analysis designed to compare code), "discrepancy" (based on model-based analysis of possible discrepancies),
-                  and "string" (based on character/word overlap). Set this to "string" for minimal runtimes/costs.
-
-                  reasoning_effort ({"none", "low", "medium", "high"}, default = "high"): how much internal LLM calls are allowed to reason (number of thinking tokens)
-                  when generating alternative possible responses and reflecting on responses during trustworthiness scoring.
-                  Higher reasoning efforts may yield more reliable TLM trustworthiness scores. Reduce this value to reduce runtimes/costs.
-
-                  log (list[str], default = []): optionally specify additional logs or metadata that TLM should return.
-                  For instance, include "explanation" here to get explanations of why a response is scored with low trustworthiness.
+                  and "string" (based on character/word overlap). Set this to "string" for minimal runtimes.
+                  This parameter has no effect when `num_consistency_samples = 0`.
 
-                  custom_eval_criteria (list[dict[str, Any]], default = []): optionally specify custom evalution criteria beyond the built-in trustworthiness scoring.
-                  The expected input format is a list of dictionaries, where each dictionary has the following keys:
-                  - name: Name of the evaluation criteria.
-                  - criteria: Instructions specifying the evaluation criteria.
+                  num_candidate_responses (int, default = 1): how many alternative candidate responses are internally generated in `TLM.prompt()`.
+                  `TLM.prompt()` scores the trustworthiness of each candidate response, and then returns the most trustworthy one.
+                  You can auto-improve responses by increasing this parameter, but at higher runtimes/costs.
+                  This parameter must be between 1 and 20. It has no effect on `TLM.score()`.
+                  When this parameter is 1, `TLM.prompt()` simply returns a standard LLM response and does not attempt to auto-improve it.
+                  This parameter has no effect when `disable_trustworthiness` is True.
 
-                  use_self_reflection (bool, default = `True`): deprecated. Use `num_self_reflections` instead.
+                  disable_trustworthiness (bool, default = False): if True, trustworthiness scoring is disabled and TLM will not compute trust scores for responses.
+                  This is useful when you only want to use custom evaluation criteria or when you want to minimize computational overhead and only need the base LLM response.
+                  The following parameters will be ignored when `disable_trustworthiness` is True: `num_consistency_samples`, `num_self_reflections`, `num_candidate_responses`, `reasoning_effort`, `similarity_measure`.
 
           prompt: The prompt to use for the TLM call. If not provided, the prompt will be
               generated from the messages.
diff --git a/src/codex/resources/projects/query_logs.py b/src/codex/resources/projects/query_logs.py
index 6fa490e8..45277433 100644
--- a/src/codex/resources/projects/query_logs.py
+++ b/src/codex/resources/projects/query_logs.py
@@ -100,6 +100,7 @@ def list(
         custom_metadata: Optional[str] | NotGiven = NOT_GIVEN,
         failed_evals: Optional[List[str]] | NotGiven = NOT_GIVEN,
         guardrailed: Optional[bool] | NotGiven = NOT_GIVEN,
+        has_tool_calls: Optional[bool] | NotGiven = NOT_GIVEN,
         limit: int | NotGiven = NOT_GIVEN,
         offset: int | NotGiven = NOT_GIVEN,
         order: Literal["asc", "desc"] | NotGiven = NOT_GIVEN,
@@ -109,6 +110,7 @@ def list(
         ]
         | NotGiven = NOT_GIVEN,
         sort: Optional[Literal["created_at", "primary_eval_issue_score"]] | NotGiven = NOT_GIVEN,
+        tool_call_names: Optional[List[str]] | NotGiven = NOT_GIVEN,
         was_cache_hit: Optional[bool] | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
@@ -131,10 +133,14 @@ def list(
 
           guardrailed: Filter by guardrailed status
 
+          has_tool_calls: Filter by whether the query log has tool calls
+
           passed_evals: Filter by evals that passed
 
           primary_eval_issue: Filter logs that have ANY of these primary evaluation issues (OR operation)
 
+          tool_call_names: Filter by names of tools called in the assistant response
+
           was_cache_hit: Filter by cache hit status
 
           extra_headers: Send extra headers
@@ -162,12 +168,14 @@ def list(
                         "custom_metadata": custom_metadata,
                         "failed_evals": failed_evals,
                         "guardrailed": guardrailed,
+                        "has_tool_calls": has_tool_calls,
                         "limit": limit,
                         "offset": offset,
                         "order": order,
                         "passed_evals": passed_evals,
                         "primary_eval_issue": primary_eval_issue,
                         "sort": sort,
+                        "tool_call_names": tool_call_names,
                         "was_cache_hit": was_cache_hit,
                     },
                     query_log_list_params.QueryLogListParams,
@@ -185,6 +193,7 @@ def list_by_group(
         custom_metadata: Optional[str] | NotGiven = NOT_GIVEN,
         failed_evals: Optional[List[str]] | NotGiven = NOT_GIVEN,
         guardrailed: Optional[bool] | NotGiven = NOT_GIVEN,
+        has_tool_calls: Optional[bool] | NotGiven = NOT_GIVEN,
         limit: int | NotGiven = NOT_GIVEN,
         needs_review: Optional[bool] | NotGiven = NOT_GIVEN,
         offset: int | NotGiven = NOT_GIVEN,
@@ -196,6 +205,7 @@ def list_by_group(
         | NotGiven = NOT_GIVEN,
         remediation_ids: List[str] | NotGiven = NOT_GIVEN,
         sort: Optional[Literal["created_at", "primary_eval_issue_score"]] | NotGiven = NOT_GIVEN,
+        tool_call_names: Optional[List[str]] | NotGiven = NOT_GIVEN,
         was_cache_hit: Optional[bool] | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
@@ -218,6 +228,8 @@ def list_by_group(
 
           guardrailed: Filter by guardrailed status
 
+          has_tool_calls: Filter by whether the query log has tool calls
+
           needs_review: Filter logs that need review
 
           passed_evals: Filter by evals that passed
@@ -226,6 +238,8 @@ def list_by_group(
 
           remediation_ids: List of groups to list child logs for
 
+          tool_call_names: Filter by names of tools called in the assistant response
+
           was_cache_hit: Filter by cache hit status
 
           extra_headers: Send extra headers
@@ -252,6 +266,7 @@ def list_by_group(
                         "custom_metadata": custom_metadata,
                         "failed_evals": failed_evals,
                         "guardrailed": guardrailed,
+                        "has_tool_calls": has_tool_calls,
                         "limit": limit,
                         "needs_review": needs_review,
                         "offset": offset,
@@ -260,6 +275,7 @@ def list_by_group(
                         "primary_eval_issue": primary_eval_issue,
                         "remediation_ids": remediation_ids,
                         "sort": sort,
+                        "tool_call_names": tool_call_names,
                         "was_cache_hit": was_cache_hit,
                     },
                     query_log_list_by_group_params.QueryLogListByGroupParams,
@@ -277,6 +293,7 @@ def list_groups(
         custom_metadata: Optional[str] | NotGiven = NOT_GIVEN,
         failed_evals: Optional[List[str]] | NotGiven = NOT_GIVEN,
         guardrailed: Optional[bool] | NotGiven = NOT_GIVEN,
+        has_tool_calls: Optional[bool] | NotGiven = NOT_GIVEN,
         limit: int | NotGiven = NOT_GIVEN,
         needs_review: Optional[bool] | NotGiven = NOT_GIVEN,
         offset: int | NotGiven = NOT_GIVEN,
@@ -288,6 +305,7 @@ def list_groups(
         | NotGiven = NOT_GIVEN,
         sort: Optional[Literal["created_at", "primary_eval_issue_score", "total_count", "custom_rank", "impact_score"]]
         | NotGiven = NOT_GIVEN,
+        tool_call_names: Optional[List[str]] | NotGiven = NOT_GIVEN,
         was_cache_hit: Optional[bool] | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
@@ -310,12 +328,16 @@ def list_groups(
 
           guardrailed: Filter by guardrailed status
 
+          has_tool_calls: Filter by whether the query log has tool calls
+
           needs_review: Filter log groups that need review
 
           passed_evals: Filter by evals that passed
 
           primary_eval_issue: Filter logs that have ANY of these primary evaluation issues (OR operation)
 
+          tool_call_names: Filter by names of tools called in the assistant response
+
           was_cache_hit: Filter by cache hit status
 
           extra_headers: Send extra headers
@@ -343,6 +365,7 @@ def list_groups(
                         "custom_metadata": custom_metadata,
                         "failed_evals": failed_evals,
                         "guardrailed": guardrailed,
+                        "has_tool_calls": has_tool_calls,
                         "limit": limit,
                         "needs_review": needs_review,
                         "offset": offset,
@@ -350,6 +373,7 @@ def list_groups(
                         "passed_evals": passed_evals,
                         "primary_eval_issue": primary_eval_issue,
                         "sort": sort,
+                        "tool_call_names": tool_call_names,
                         "was_cache_hit": was_cache_hit,
                     },
                     query_log_list_groups_params.QueryLogListGroupsParams,
@@ -460,6 +484,7 @@ def list(
         custom_metadata: Optional[str] | NotGiven = NOT_GIVEN,
         failed_evals: Optional[List[str]] | NotGiven = NOT_GIVEN,
         guardrailed: Optional[bool] | NotGiven = NOT_GIVEN,
+        has_tool_calls: Optional[bool] | NotGiven = NOT_GIVEN,
         limit: int | NotGiven = NOT_GIVEN,
         offset: int | NotGiven = NOT_GIVEN,
         order: Literal["asc", "desc"] | NotGiven = NOT_GIVEN,
@@ -469,6 +494,7 @@ def list(
         ]
         | NotGiven = NOT_GIVEN,
         sort: Optional[Literal["created_at", "primary_eval_issue_score"]] | NotGiven = NOT_GIVEN,
+        tool_call_names: Optional[List[str]] | NotGiven = NOT_GIVEN,
         was_cache_hit: Optional[bool] | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
@@ -491,10 +517,14 @@ def list(
 
           guardrailed: Filter by guardrailed status
 
+          has_tool_calls: Filter by whether the query log has tool calls
+
           passed_evals: Filter by evals that passed
 
           primary_eval_issue: Filter logs that have ANY of these primary evaluation issues (OR operation)
 
+          tool_call_names: Filter by names of tools called in the assistant response
+
           was_cache_hit: Filter by cache hit status
 
           extra_headers: Send extra headers
@@ -522,12 +552,14 @@ def list(
                         "custom_metadata": custom_metadata,
                         "failed_evals": failed_evals,
                         "guardrailed": guardrailed,
+                        "has_tool_calls": has_tool_calls,
                         "limit": limit,
                         "offset": offset,
                         "order": order,
                         "passed_evals": passed_evals,
                         "primary_eval_issue": primary_eval_issue,
                         "sort": sort,
+                        "tool_call_names": tool_call_names,
                         "was_cache_hit": was_cache_hit,
                     },
                     query_log_list_params.QueryLogListParams,
@@ -545,6 +577,7 @@ async def list_by_group(
         custom_metadata: Optional[str] | NotGiven = NOT_GIVEN,
         failed_evals: Optional[List[str]] | NotGiven = NOT_GIVEN,
         guardrailed: Optional[bool] | NotGiven = NOT_GIVEN,
+        has_tool_calls: Optional[bool] | NotGiven = NOT_GIVEN,
         limit: int | NotGiven = NOT_GIVEN,
         needs_review: Optional[bool] | NotGiven = NOT_GIVEN,
         offset: int | NotGiven = NOT_GIVEN,
@@ -556,6 +589,7 @@ async def list_by_group(
         | NotGiven = NOT_GIVEN,
         remediation_ids: List[str] | NotGiven = NOT_GIVEN,
         sort: Optional[Literal["created_at", "primary_eval_issue_score"]] | NotGiven = NOT_GIVEN,
+        tool_call_names: Optional[List[str]] | NotGiven = NOT_GIVEN,
         was_cache_hit: Optional[bool] | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
@@ -578,6 +612,8 @@ async def list_by_group(
 
           guardrailed: Filter by guardrailed status
 
+          has_tool_calls: Filter by whether the query log has tool calls
+
           needs_review: Filter logs that need review
 
           passed_evals: Filter by evals that passed
@@ -586,6 +622,8 @@ async def list_by_group(
 
           remediation_ids: List of groups to list child logs for
 
+          tool_call_names: Filter by names of tools called in the assistant response
+
           was_cache_hit: Filter by cache hit status
 
           extra_headers: Send extra headers
@@ -612,6 +650,7 @@ async def list_by_group(
                         "custom_metadata": custom_metadata,
                         "failed_evals": failed_evals,
                         "guardrailed": guardrailed,
+                        "has_tool_calls": has_tool_calls,
                         "limit": limit,
                         "needs_review": needs_review,
                         "offset": offset,
@@ -620,6 +659,7 @@ async def list_by_group(
                         "primary_eval_issue": primary_eval_issue,
                         "remediation_ids": remediation_ids,
                         "sort": sort,
+                        "tool_call_names": tool_call_names,
                         "was_cache_hit": was_cache_hit,
                     },
                     query_log_list_by_group_params.QueryLogListByGroupParams,
@@ -637,6 +677,7 @@ def list_groups(
         custom_metadata: Optional[str] | NotGiven = NOT_GIVEN,
         failed_evals: Optional[List[str]] | NotGiven = NOT_GIVEN,
         guardrailed: Optional[bool] | NotGiven = NOT_GIVEN,
+        has_tool_calls: Optional[bool] | NotGiven = NOT_GIVEN,
         limit: int | NotGiven = NOT_GIVEN,
         needs_review: Optional[bool] | NotGiven = NOT_GIVEN,
         offset: int | NotGiven = NOT_GIVEN,
@@ -648,6 +689,7 @@ def list_groups(
         | NotGiven = NOT_GIVEN,
         sort: Optional[Literal["created_at", "primary_eval_issue_score", "total_count", "custom_rank", "impact_score"]]
         | NotGiven = NOT_GIVEN,
+        tool_call_names: Optional[List[str]] | NotGiven = NOT_GIVEN,
         was_cache_hit: Optional[bool] | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
@@ -670,12 +712,16 @@ def list_groups(
 
           guardrailed: Filter by guardrailed status
 
+          has_tool_calls: Filter by whether the query log has tool calls
+
           needs_review: Filter log groups that need review
 
           passed_evals: Filter by evals that passed
 
           primary_eval_issue: Filter logs that have ANY of these primary evaluation issues (OR operation)
 
+          tool_call_names: Filter by names of tools called in the assistant response
+
           was_cache_hit: Filter by cache hit status
 
           extra_headers: Send extra headers
@@ -703,6 +749,7 @@ def list_groups(
                         "custom_metadata": custom_metadata,
                         "failed_evals": failed_evals,
                         "guardrailed": guardrailed,
+                        "has_tool_calls": has_tool_calls,
                         "limit": limit,
                         "needs_review": needs_review,
                         "offset": offset,
@@ -710,6 +757,7 @@ def list_groups(
                         "passed_evals": passed_evals,
                         "primary_eval_issue": primary_eval_issue,
                         "sort": sort,
+                        "tool_call_names": tool_call_names,
                         "was_cache_hit": was_cache_hit,
                     },
                     query_log_list_groups_params.QueryLogListGroupsParams,
diff --git a/src/codex/resources/tlm.py b/src/codex/resources/tlm.py
index c6064ed6..5d66ec04 100644
--- a/src/codex/resources/tlm.py
+++ b/src/codex/resources/tlm.py
@@ -94,60 +94,65 @@ def prompt(
               `model`, and `max_tokens` is set to 512. You can set custom values for these
               arguments regardless of the quality preset specified.
 
-              Args: model ({"gpt-4.1", "gpt-4.1-mini", "gpt-4.1-nano", "o4-mini", "o3",
-              "gpt-4.5-preview", "gpt-4o-mini", "gpt-4o", "o3-mini", "o1", "o1-mini", "gpt-4",
-              "gpt-3.5-turbo-16k", "claude-opus-4-0", "claude-sonnet-4-0",
-              "claude-3.7-sonnet", "claude-3.5-sonnet-v2", "claude-3.5-sonnet",
-              "claude-3.5-haiku", "claude-3-haiku", "nova-micro", "nova-lite", "nova-pro"},
-              default = "gpt-4.1-mini"): Underlying base LLM to use (better models yield
-              better results, faster models yield faster results). - Models still in beta:
-              "o3", "o1", "o4-mini", "o3-mini", "o1-mini", "gpt-4.5-preview",
-              "claude-opus-4-0", "claude-sonnet-4-0", "claude-3.7-sonnet",
-              "claude-3.5-haiku". - Recommended models for accuracy: "gpt-4.1", "o4-mini",
-              "o3", "claude-opus-4-0", "claude-sonnet-4-0". - Recommended models for low
-              latency/costs: "gpt-4.1-nano", "nova-micro".
-
-                  max_tokens (int, default = 512): the maximum number of tokens that can be generated in the TLM response (and in internal trustworthiness scoring).
-                  Higher values here may produce better (more reliable) TLM responses and trustworthiness scores, but at higher runtimes/costs.
-                  If you experience token/rate limit errors while using TLM, try lowering this number.
+              Args: model ({"gpt-5", "gpt-5-mini", "gpt-5-nano", "gpt-4.1", "gpt-4.1-mini",
+              "gpt-4.1-nano", "o4-mini", "o3", "gpt-4.5-preview", "gpt-4o-mini", "gpt-4o",
+              "o3-mini", "o1", "o1-mini", "gpt-4", "gpt-3.5-turbo-16k", "claude-opus-4-0",
+              "claude-sonnet-4-0", "claude-3.7-sonnet", "claude-3.5-sonnet-v2",
+              "claude-3.5-sonnet", "claude-3.5-haiku", "claude-3-haiku", "nova-micro",
+              "nova-lite", "nova-pro"}, default = "gpt-4.1-mini"): Underlying base LLM to use
+              (better models yield better results, faster models yield faster results). -
+              Models still in beta: "o3", "o1", "o4-mini", "o3-mini", "o1-mini",
+              "gpt-4.5-preview", "claude-opus-4-0", "claude-sonnet-4-0", "claude-3.7-sonnet",
+              "claude-3.5-haiku". - Recommended models for accuracy: "gpt-5", "gpt-4.1",
+              "o4-mini", "o3", "claude-opus-4-0", "claude-sonnet-4-0". - Recommended models
+              for low latency/costs: "gpt-4.1-nano", "nova-micro".
+
+                  log (list[str], default = []): optionally specify additional logs or metadata that TLM should return.
+                  For instance, include "explanation" here to get explanations of why a response is scored with low trustworthiness.
+
+                  custom_eval_criteria (list[dict[str, Any]], default = []): optionally specify custom evalution criteria beyond the built-in trustworthiness scoring.
+                  The expected input format is a list of dictionaries, where each dictionary has the following keys:
+                  - name: Name of the evaluation criteria.
+                  - criteria: Instructions specifying the evaluation criteria.
+
+                  max_tokens (int, default = 512): the maximum number of tokens that can be generated in the response from `TLM.prompt()` as well as during internal trustworthiness scoring.
+                  If you experience token/rate-limit errors, try lowering this number.
                   For OpenAI models, this parameter must be between 64 and 4096. For Claude models, this parameter must be between 64 and 512.
 
-                  num_candidate_responses (int, default = 1): how many alternative candidate responses are internally generated in `TLM.prompt()`.
-                  `TLM.prompt()` scores the trustworthiness of each candidate response, and then returns the most trustworthy one.
-                  This parameter must be between 1 and 20. It has no effect on `TLM.score()`.
-                  Higher values here can produce more accurate responses from `TLM.prompt()`, but at higher runtimes/costs.
-                  When it is 1, `TLM.prompt()` simply returns a standard LLM response and does not attempt to auto-improve it.
+                  reasoning_effort ({"none", "low", "medium", "high"}, default = "high"): how much internal LLM calls are allowed to reason (number of thinking tokens)
+                  when generating alternative possible responses and reflecting on responses during trustworthiness scoring.
+                  Reduce this value to reduce runtimes. Higher values may improve trust scoring.
+
+                  num_self_reflections (int, default = 3): the number of different evaluations to perform where the LLM reflects on the response, a factor affecting trust scoring.
+                  The maximum number currently supported is 3. Lower values can reduce runtimes.
+                  Reflection helps quantify aleatoric uncertainty associated with challenging prompts and catches responses that are noticeably incorrect/bad upon further analysis.
+                  This parameter has no effect when `disable_trustworthiness` is True.
 
-                  num_consistency_samples (int, default = 8): the amount of internal sampling to measure LLM response consistency, a factor affecting trustworthiness scoring.
-                  Must be between 0 and 20. Higher values produce more reliable TLM trustworthiness scores, but at higher runtimes/costs.
+                  num_consistency_samples (int, default = 8): the amount of internal sampling to measure LLM response consistency, a factor affecting trust scoring.
+                  Must be between 0 and 20. Lower values can reduce runtimes.
                   Measuring consistency helps quantify the epistemic uncertainty associated with
                   strange prompts or prompts that are too vague/open-ended to receive a clearly defined 'good' response.
                   TLM measures consistency via the degree of contradiction between sampled responses that the model considers plausible.
-
-                  num_self_reflections(int, default = 3): the number of self-reflections to perform where the LLM is asked to reflect on the given response and directly evaluate correctness/confidence.
-                  The maximum number of self-reflections currently supported is 3. Lower values will reduce runtimes/costs, but potentially also the reliability of trustworthiness scores.
-                  Reflection helps quantify aleatoric uncertainty associated with challenging prompts and catches responses that are noticeably incorrect/bad upon further analysis.
+                  This parameter has no effect when `disable_trustworthiness` is True.
 
                   similarity_measure ({"semantic", "string", "embedding", "embedding_large", "code", "discrepancy"}, default = "discrepancy"): how the
                   trustworthiness scoring's consistency algorithm measures similarity between alternative responses considered plausible by the model.
                   Supported similarity measures include - "semantic" (based on natural language inference),
                   "embedding" (based on vector embedding similarity), "embedding_large" (based on a larger embedding model),
                   "code" (based on model-based analysis designed to compare code), "discrepancy" (based on model-based analysis of possible discrepancies),
-                  and "string" (based on character/word overlap). Set this to "string" for minimal runtimes/costs.
-
-                  reasoning_effort ({"none", "low", "medium", "high"}, default = "high"): how much internal LLM calls are allowed to reason (number of thinking tokens)
-                  when generating alternative possible responses and reflecting on responses during trustworthiness scoring.
-                  Higher reasoning efforts may yield more reliable TLM trustworthiness scores. Reduce this value to reduce runtimes/costs.
-
-                  log (list[str], default = []): optionally specify additional logs or metadata that TLM should return.
-                  For instance, include "explanation" here to get explanations of why a response is scored with low trustworthiness.
+                  and "string" (based on character/word overlap). Set this to "string" for minimal runtimes.
+                  This parameter has no effect when `num_consistency_samples = 0`.
 
-                  custom_eval_criteria (list[dict[str, Any]], default = []): optionally specify custom evalution criteria beyond the built-in trustworthiness scoring.
-                  The expected input format is a list of dictionaries, where each dictionary has the following keys:
-                  - name: Name of the evaluation criteria.
-                  - criteria: Instructions specifying the evaluation criteria.
+                  num_candidate_responses (int, default = 1): how many alternative candidate responses are internally generated in `TLM.prompt()`.
+                  `TLM.prompt()` scores the trustworthiness of each candidate response, and then returns the most trustworthy one.
+                  You can auto-improve responses by increasing this parameter, but at higher runtimes/costs.
+                  This parameter must be between 1 and 20. It has no effect on `TLM.score()`.
+                  When this parameter is 1, `TLM.prompt()` simply returns a standard LLM response and does not attempt to auto-improve it.
+                  This parameter has no effect when `disable_trustworthiness` is True.
 
-                  use_self_reflection (bool, default = `True`): deprecated. Use `num_self_reflections` instead.
+                  disable_trustworthiness (bool, default = False): if True, trustworthiness scoring is disabled and TLM will not compute trust scores for responses.
+                  This is useful when you only want to use custom evaluation criteria or when you want to minimize computational overhead and only need the base LLM response.
+                  The following parameters will be ignored when `disable_trustworthiness` is True: `num_consistency_samples`, `num_self_reflections`, `num_candidate_responses`, `reasoning_effort`, `similarity_measure`.
 
           quality_preset: The quality preset to use for the TLM or Trustworthy RAG API.
 
@@ -232,60 +237,65 @@ def score(
               `model`, and `max_tokens` is set to 512. You can set custom values for these
               arguments regardless of the quality preset specified.
 
-              Args: model ({"gpt-4.1", "gpt-4.1-mini", "gpt-4.1-nano", "o4-mini", "o3",
-              "gpt-4.5-preview", "gpt-4o-mini", "gpt-4o", "o3-mini", "o1", "o1-mini", "gpt-4",
-              "gpt-3.5-turbo-16k", "claude-opus-4-0", "claude-sonnet-4-0",
-              "claude-3.7-sonnet", "claude-3.5-sonnet-v2", "claude-3.5-sonnet",
-              "claude-3.5-haiku", "claude-3-haiku", "nova-micro", "nova-lite", "nova-pro"},
-              default = "gpt-4.1-mini"): Underlying base LLM to use (better models yield
-              better results, faster models yield faster results). - Models still in beta:
-              "o3", "o1", "o4-mini", "o3-mini", "o1-mini", "gpt-4.5-preview",
-              "claude-opus-4-0", "claude-sonnet-4-0", "claude-3.7-sonnet",
-              "claude-3.5-haiku". - Recommended models for accuracy: "gpt-4.1", "o4-mini",
-              "o3", "claude-opus-4-0", "claude-sonnet-4-0". - Recommended models for low
-              latency/costs: "gpt-4.1-nano", "nova-micro".
-
-                  max_tokens (int, default = 512): the maximum number of tokens that can be generated in the TLM response (and in internal trustworthiness scoring).
-                  Higher values here may produce better (more reliable) TLM responses and trustworthiness scores, but at higher runtimes/costs.
-                  If you experience token/rate limit errors while using TLM, try lowering this number.
+              Args: model ({"gpt-5", "gpt-5-mini", "gpt-5-nano", "gpt-4.1", "gpt-4.1-mini",
+              "gpt-4.1-nano", "o4-mini", "o3", "gpt-4.5-preview", "gpt-4o-mini", "gpt-4o",
+              "o3-mini", "o1", "o1-mini", "gpt-4", "gpt-3.5-turbo-16k", "claude-opus-4-0",
+              "claude-sonnet-4-0", "claude-3.7-sonnet", "claude-3.5-sonnet-v2",
+              "claude-3.5-sonnet", "claude-3.5-haiku", "claude-3-haiku", "nova-micro",
+              "nova-lite", "nova-pro"}, default = "gpt-4.1-mini"): Underlying base LLM to use
+              (better models yield better results, faster models yield faster results). -
+              Models still in beta: "o3", "o1", "o4-mini", "o3-mini", "o1-mini",
+              "gpt-4.5-preview", "claude-opus-4-0", "claude-sonnet-4-0", "claude-3.7-sonnet",
+              "claude-3.5-haiku". - Recommended models for accuracy: "gpt-5", "gpt-4.1",
+              "o4-mini", "o3", "claude-opus-4-0", "claude-sonnet-4-0". - Recommended models
+              for low latency/costs: "gpt-4.1-nano", "nova-micro".
+
+                  log (list[str], default = []): optionally specify additional logs or metadata that TLM should return.
+                  For instance, include "explanation" here to get explanations of why a response is scored with low trustworthiness.
+
+                  custom_eval_criteria (list[dict[str, Any]], default = []): optionally specify custom evalution criteria beyond the built-in trustworthiness scoring.
+                  The expected input format is a list of dictionaries, where each dictionary has the following keys:
+                  - name: Name of the evaluation criteria.
+                  - criteria: Instructions specifying the evaluation criteria.
+
+                  max_tokens (int, default = 512): the maximum number of tokens that can be generated in the response from `TLM.prompt()` as well as during internal trustworthiness scoring.
+                  If you experience token/rate-limit errors, try lowering this number.
                   For OpenAI models, this parameter must be between 64 and 4096. For Claude models, this parameter must be between 64 and 512.
 
-                  num_candidate_responses (int, default = 1): how many alternative candidate responses are internally generated in `TLM.prompt()`.
-                  `TLM.prompt()` scores the trustworthiness of each candidate response, and then returns the most trustworthy one.
-                  This parameter must be between 1 and 20. It has no effect on `TLM.score()`.
-                  Higher values here can produce more accurate responses from `TLM.prompt()`, but at higher runtimes/costs.
-                  When it is 1, `TLM.prompt()` simply returns a standard LLM response and does not attempt to auto-improve it.
+                  reasoning_effort ({"none", "low", "medium", "high"}, default = "high"): how much internal LLM calls are allowed to reason (number of thinking tokens)
+                  when generating alternative possible responses and reflecting on responses during trustworthiness scoring.
+                  Reduce this value to reduce runtimes. Higher values may improve trust scoring.
+
+                  num_self_reflections (int, default = 3): the number of different evaluations to perform where the LLM reflects on the response, a factor affecting trust scoring.
+                  The maximum number currently supported is 3. Lower values can reduce runtimes.
+                  Reflection helps quantify aleatoric uncertainty associated with challenging prompts and catches responses that are noticeably incorrect/bad upon further analysis.
+                  This parameter has no effect when `disable_trustworthiness` is True.
 
-                  num_consistency_samples (int, default = 8): the amount of internal sampling to measure LLM response consistency, a factor affecting trustworthiness scoring.
-                  Must be between 0 and 20. Higher values produce more reliable TLM trustworthiness scores, but at higher runtimes/costs.
+                  num_consistency_samples (int, default = 8): the amount of internal sampling to measure LLM response consistency, a factor affecting trust scoring.
+                  Must be between 0 and 20. Lower values can reduce runtimes.
                   Measuring consistency helps quantify the epistemic uncertainty associated with
                   strange prompts or prompts that are too vague/open-ended to receive a clearly defined 'good' response.
                   TLM measures consistency via the degree of contradiction between sampled responses that the model considers plausible.
-
-                  num_self_reflections(int, default = 3): the number of self-reflections to perform where the LLM is asked to reflect on the given response and directly evaluate correctness/confidence.
-                  The maximum number of self-reflections currently supported is 3. Lower values will reduce runtimes/costs, but potentially also the reliability of trustworthiness scores.
-                  Reflection helps quantify aleatoric uncertainty associated with challenging prompts and catches responses that are noticeably incorrect/bad upon further analysis.
+                  This parameter has no effect when `disable_trustworthiness` is True.
 
                   similarity_measure ({"semantic", "string", "embedding", "embedding_large", "code", "discrepancy"}, default = "discrepancy"): how the
                   trustworthiness scoring's consistency algorithm measures similarity between alternative responses considered plausible by the model.
                   Supported similarity measures include - "semantic" (based on natural language inference),
                   "embedding" (based on vector embedding similarity), "embedding_large" (based on a larger embedding model),
                   "code" (based on model-based analysis designed to compare code), "discrepancy" (based on model-based analysis of possible discrepancies),
-                  and "string" (based on character/word overlap). Set this to "string" for minimal runtimes/costs.
-
-                  reasoning_effort ({"none", "low", "medium", "high"}, default = "high"): how much internal LLM calls are allowed to reason (number of thinking tokens)
-                  when generating alternative possible responses and reflecting on responses during trustworthiness scoring.
-                  Higher reasoning efforts may yield more reliable TLM trustworthiness scores. Reduce this value to reduce runtimes/costs.
-
-                  log (list[str], default = []): optionally specify additional logs or metadata that TLM should return.
-                  For instance, include "explanation" here to get explanations of why a response is scored with low trustworthiness.
+                  and "string" (based on character/word overlap). Set this to "string" for minimal runtimes.
+                  This parameter has no effect when `num_consistency_samples = 0`.
 
-                  custom_eval_criteria (list[dict[str, Any]], default = []): optionally specify custom evalution criteria beyond the built-in trustworthiness scoring.
-                  The expected input format is a list of dictionaries, where each dictionary has the following keys:
-                  - name: Name of the evaluation criteria.
-                  - criteria: Instructions specifying the evaluation criteria.
+                  num_candidate_responses (int, default = 1): how many alternative candidate responses are internally generated in `TLM.prompt()`.
+                  `TLM.prompt()` scores the trustworthiness of each candidate response, and then returns the most trustworthy one.
+                  You can auto-improve responses by increasing this parameter, but at higher runtimes/costs.
+                  This parameter must be between 1 and 20. It has no effect on `TLM.score()`.
+                  When this parameter is 1, `TLM.prompt()` simply returns a standard LLM response and does not attempt to auto-improve it.
+                  This parameter has no effect when `disable_trustworthiness` is True.
 
-                  use_self_reflection (bool, default = `True`): deprecated. Use `num_self_reflections` instead.
+                  disable_trustworthiness (bool, default = False): if True, trustworthiness scoring is disabled and TLM will not compute trust scores for responses.
+                  This is useful when you only want to use custom evaluation criteria or when you want to minimize computational overhead and only need the base LLM response.
+                  The following parameters will be ignored when `disable_trustworthiness` is True: `num_consistency_samples`, `num_self_reflections`, `num_candidate_responses`, `reasoning_effort`, `similarity_measure`.
 
           quality_preset: The quality preset to use for the TLM or Trustworthy RAG API.
 
@@ -386,60 +396,65 @@ async def prompt(
               `model`, and `max_tokens` is set to 512. You can set custom values for these
               arguments regardless of the quality preset specified.
 
-              Args: model ({"gpt-4.1", "gpt-4.1-mini", "gpt-4.1-nano", "o4-mini", "o3",
-              "gpt-4.5-preview", "gpt-4o-mini", "gpt-4o", "o3-mini", "o1", "o1-mini", "gpt-4",
-              "gpt-3.5-turbo-16k", "claude-opus-4-0", "claude-sonnet-4-0",
-              "claude-3.7-sonnet", "claude-3.5-sonnet-v2", "claude-3.5-sonnet",
-              "claude-3.5-haiku", "claude-3-haiku", "nova-micro", "nova-lite", "nova-pro"},
-              default = "gpt-4.1-mini"): Underlying base LLM to use (better models yield
-              better results, faster models yield faster results). - Models still in beta:
-              "o3", "o1", "o4-mini", "o3-mini", "o1-mini", "gpt-4.5-preview",
-              "claude-opus-4-0", "claude-sonnet-4-0", "claude-3.7-sonnet",
-              "claude-3.5-haiku". - Recommended models for accuracy: "gpt-4.1", "o4-mini",
-              "o3", "claude-opus-4-0", "claude-sonnet-4-0". - Recommended models for low
-              latency/costs: "gpt-4.1-nano", "nova-micro".
-
-                  max_tokens (int, default = 512): the maximum number of tokens that can be generated in the TLM response (and in internal trustworthiness scoring).
-                  Higher values here may produce better (more reliable) TLM responses and trustworthiness scores, but at higher runtimes/costs.
-                  If you experience token/rate limit errors while using TLM, try lowering this number.
+              Args: model ({"gpt-5", "gpt-5-mini", "gpt-5-nano", "gpt-4.1", "gpt-4.1-mini",
+              "gpt-4.1-nano", "o4-mini", "o3", "gpt-4.5-preview", "gpt-4o-mini", "gpt-4o",
+              "o3-mini", "o1", "o1-mini", "gpt-4", "gpt-3.5-turbo-16k", "claude-opus-4-0",
+              "claude-sonnet-4-0", "claude-3.7-sonnet", "claude-3.5-sonnet-v2",
+              "claude-3.5-sonnet", "claude-3.5-haiku", "claude-3-haiku", "nova-micro",
+              "nova-lite", "nova-pro"}, default = "gpt-4.1-mini"): Underlying base LLM to use
+              (better models yield better results, faster models yield faster results). -
+              Models still in beta: "o3", "o1", "o4-mini", "o3-mini", "o1-mini",
+              "gpt-4.5-preview", "claude-opus-4-0", "claude-sonnet-4-0", "claude-3.7-sonnet",
+              "claude-3.5-haiku". - Recommended models for accuracy: "gpt-5", "gpt-4.1",
+              "o4-mini", "o3", "claude-opus-4-0", "claude-sonnet-4-0". - Recommended models
+              for low latency/costs: "gpt-4.1-nano", "nova-micro".
+
+                  log (list[str], default = []): optionally specify additional logs or metadata that TLM should return.
+                  For instance, include "explanation" here to get explanations of why a response is scored with low trustworthiness.
+
+                  custom_eval_criteria (list[dict[str, Any]], default = []): optionally specify custom evalution criteria beyond the built-in trustworthiness scoring.
+                  The expected input format is a list of dictionaries, where each dictionary has the following keys:
+                  - name: Name of the evaluation criteria.
+                  - criteria: Instructions specifying the evaluation criteria.
+
+                  max_tokens (int, default = 512): the maximum number of tokens that can be generated in the response from `TLM.prompt()` as well as during internal trustworthiness scoring.
+                  If you experience token/rate-limit errors, try lowering this number.
                   For OpenAI models, this parameter must be between 64 and 4096. For Claude models, this parameter must be between 64 and 512.
 
-                  num_candidate_responses (int, default = 1): how many alternative candidate responses are internally generated in `TLM.prompt()`.
-                  `TLM.prompt()` scores the trustworthiness of each candidate response, and then returns the most trustworthy one.
-                  This parameter must be between 1 and 20. It has no effect on `TLM.score()`.
-                  Higher values here can produce more accurate responses from `TLM.prompt()`, but at higher runtimes/costs.
-                  When it is 1, `TLM.prompt()` simply returns a standard LLM response and does not attempt to auto-improve it.
+                  reasoning_effort ({"none", "low", "medium", "high"}, default = "high"): how much internal LLM calls are allowed to reason (number of thinking tokens)
+                  when generating alternative possible responses and reflecting on responses during trustworthiness scoring.
+                  Reduce this value to reduce runtimes. Higher values may improve trust scoring.
+
+                  num_self_reflections (int, default = 3): the number of different evaluations to perform where the LLM reflects on the response, a factor affecting trust scoring.
+                  The maximum number currently supported is 3. Lower values can reduce runtimes.
+                  Reflection helps quantify aleatoric uncertainty associated with challenging prompts and catches responses that are noticeably incorrect/bad upon further analysis.
+                  This parameter has no effect when `disable_trustworthiness` is True.
 
-                  num_consistency_samples (int, default = 8): the amount of internal sampling to measure LLM response consistency, a factor affecting trustworthiness scoring.
-                  Must be between 0 and 20. Higher values produce more reliable TLM trustworthiness scores, but at higher runtimes/costs.
+                  num_consistency_samples (int, default = 8): the amount of internal sampling to measure LLM response consistency, a factor affecting trust scoring.
+                  Must be between 0 and 20. Lower values can reduce runtimes.
                   Measuring consistency helps quantify the epistemic uncertainty associated with
                   strange prompts or prompts that are too vague/open-ended to receive a clearly defined 'good' response.
                   TLM measures consistency via the degree of contradiction between sampled responses that the model considers plausible.
-
-                  num_self_reflections(int, default = 3): the number of self-reflections to perform where the LLM is asked to reflect on the given response and directly evaluate correctness/confidence.
-                  The maximum number of self-reflections currently supported is 3. Lower values will reduce runtimes/costs, but potentially also the reliability of trustworthiness scores.
-                  Reflection helps quantify aleatoric uncertainty associated with challenging prompts and catches responses that are noticeably incorrect/bad upon further analysis.
+                  This parameter has no effect when `disable_trustworthiness` is True.
 
                   similarity_measure ({"semantic", "string", "embedding", "embedding_large", "code", "discrepancy"}, default = "discrepancy"): how the
                   trustworthiness scoring's consistency algorithm measures similarity between alternative responses considered plausible by the model.
                   Supported similarity measures include - "semantic" (based on natural language inference),
                   "embedding" (based on vector embedding similarity), "embedding_large" (based on a larger embedding model),
                   "code" (based on model-based analysis designed to compare code), "discrepancy" (based on model-based analysis of possible discrepancies),
-                  and "string" (based on character/word overlap). Set this to "string" for minimal runtimes/costs.
-
-                  reasoning_effort ({"none", "low", "medium", "high"}, default = "high"): how much internal LLM calls are allowed to reason (number of thinking tokens)
-                  when generating alternative possible responses and reflecting on responses during trustworthiness scoring.
-                  Higher reasoning efforts may yield more reliable TLM trustworthiness scores. Reduce this value to reduce runtimes/costs.
-
-                  log (list[str], default = []): optionally specify additional logs or metadata that TLM should return.
-                  For instance, include "explanation" here to get explanations of why a response is scored with low trustworthiness.
+                  and "string" (based on character/word overlap). Set this to "string" for minimal runtimes.
+                  This parameter has no effect when `num_consistency_samples = 0`.
 
-                  custom_eval_criteria (list[dict[str, Any]], default = []): optionally specify custom evalution criteria beyond the built-in trustworthiness scoring.
-                  The expected input format is a list of dictionaries, where each dictionary has the following keys:
-                  - name: Name of the evaluation criteria.
-                  - criteria: Instructions specifying the evaluation criteria.
+                  num_candidate_responses (int, default = 1): how many alternative candidate responses are internally generated in `TLM.prompt()`.
+                  `TLM.prompt()` scores the trustworthiness of each candidate response, and then returns the most trustworthy one.
+                  You can auto-improve responses by increasing this parameter, but at higher runtimes/costs.
+                  This parameter must be between 1 and 20. It has no effect on `TLM.score()`.
+                  When this parameter is 1, `TLM.prompt()` simply returns a standard LLM response and does not attempt to auto-improve it.
+                  This parameter has no effect when `disable_trustworthiness` is True.
 
-                  use_self_reflection (bool, default = `True`): deprecated. Use `num_self_reflections` instead.
+                  disable_trustworthiness (bool, default = False): if True, trustworthiness scoring is disabled and TLM will not compute trust scores for responses.
+                  This is useful when you only want to use custom evaluation criteria or when you want to minimize computational overhead and only need the base LLM response.
+                  The following parameters will be ignored when `disable_trustworthiness` is True: `num_consistency_samples`, `num_self_reflections`, `num_candidate_responses`, `reasoning_effort`, `similarity_measure`.
 
           quality_preset: The quality preset to use for the TLM or Trustworthy RAG API.
 
@@ -524,60 +539,65 @@ async def score(
               `model`, and `max_tokens` is set to 512. You can set custom values for these
               arguments regardless of the quality preset specified.
 
-              Args: model ({"gpt-4.1", "gpt-4.1-mini", "gpt-4.1-nano", "o4-mini", "o3",
-              "gpt-4.5-preview", "gpt-4o-mini", "gpt-4o", "o3-mini", "o1", "o1-mini", "gpt-4",
-              "gpt-3.5-turbo-16k", "claude-opus-4-0", "claude-sonnet-4-0",
-              "claude-3.7-sonnet", "claude-3.5-sonnet-v2", "claude-3.5-sonnet",
-              "claude-3.5-haiku", "claude-3-haiku", "nova-micro", "nova-lite", "nova-pro"},
-              default = "gpt-4.1-mini"): Underlying base LLM to use (better models yield
-              better results, faster models yield faster results). - Models still in beta:
-              "o3", "o1", "o4-mini", "o3-mini", "o1-mini", "gpt-4.5-preview",
-              "claude-opus-4-0", "claude-sonnet-4-0", "claude-3.7-sonnet",
-              "claude-3.5-haiku". - Recommended models for accuracy: "gpt-4.1", "o4-mini",
-              "o3", "claude-opus-4-0", "claude-sonnet-4-0". - Recommended models for low
-              latency/costs: "gpt-4.1-nano", "nova-micro".
-
-                  max_tokens (int, default = 512): the maximum number of tokens that can be generated in the TLM response (and in internal trustworthiness scoring).
-                  Higher values here may produce better (more reliable) TLM responses and trustworthiness scores, but at higher runtimes/costs.
-                  If you experience token/rate limit errors while using TLM, try lowering this number.
+              Args: model ({"gpt-5", "gpt-5-mini", "gpt-5-nano", "gpt-4.1", "gpt-4.1-mini",
+              "gpt-4.1-nano", "o4-mini", "o3", "gpt-4.5-preview", "gpt-4o-mini", "gpt-4o",
+              "o3-mini", "o1", "o1-mini", "gpt-4", "gpt-3.5-turbo-16k", "claude-opus-4-0",
+              "claude-sonnet-4-0", "claude-3.7-sonnet", "claude-3.5-sonnet-v2",
+              "claude-3.5-sonnet", "claude-3.5-haiku", "claude-3-haiku", "nova-micro",
+              "nova-lite", "nova-pro"}, default = "gpt-4.1-mini"): Underlying base LLM to use
+              (better models yield better results, faster models yield faster results). -
+              Models still in beta: "o3", "o1", "o4-mini", "o3-mini", "o1-mini",
+              "gpt-4.5-preview", "claude-opus-4-0", "claude-sonnet-4-0", "claude-3.7-sonnet",
+              "claude-3.5-haiku". - Recommended models for accuracy: "gpt-5", "gpt-4.1",
+              "o4-mini", "o3", "claude-opus-4-0", "claude-sonnet-4-0". - Recommended models
+              for low latency/costs: "gpt-4.1-nano", "nova-micro".
+
+                  log (list[str], default = []): optionally specify additional logs or metadata that TLM should return.
+                  For instance, include "explanation" here to get explanations of why a response is scored with low trustworthiness.
+
+                  custom_eval_criteria (list[dict[str, Any]], default = []): optionally specify custom evalution criteria beyond the built-in trustworthiness scoring.
+                  The expected input format is a list of dictionaries, where each dictionary has the following keys:
+                  - name: Name of the evaluation criteria.
+                  - criteria: Instructions specifying the evaluation criteria.
+
+                  max_tokens (int, default = 512): the maximum number of tokens that can be generated in the response from `TLM.prompt()` as well as during internal trustworthiness scoring.
+                  If you experience token/rate-limit errors, try lowering this number.
                   For OpenAI models, this parameter must be between 64 and 4096. For Claude models, this parameter must be between 64 and 512.
 
-                  num_candidate_responses (int, default = 1): how many alternative candidate responses are internally generated in `TLM.prompt()`.
-                  `TLM.prompt()` scores the trustworthiness of each candidate response, and then returns the most trustworthy one.
-                  This parameter must be between 1 and 20. It has no effect on `TLM.score()`.
-                  Higher values here can produce more accurate responses from `TLM.prompt()`, but at higher runtimes/costs.
-                  When it is 1, `TLM.prompt()` simply returns a standard LLM response and does not attempt to auto-improve it.
+                  reasoning_effort ({"none", "low", "medium", "high"}, default = "high"): how much internal LLM calls are allowed to reason (number of thinking tokens)
+                  when generating alternative possible responses and reflecting on responses during trustworthiness scoring.
+                  Reduce this value to reduce runtimes. Higher values may improve trust scoring.
+
+                  num_self_reflections (int, default = 3): the number of different evaluations to perform where the LLM reflects on the response, a factor affecting trust scoring.
+                  The maximum number currently supported is 3. Lower values can reduce runtimes.
+                  Reflection helps quantify aleatoric uncertainty associated with challenging prompts and catches responses that are noticeably incorrect/bad upon further analysis.
+                  This parameter has no effect when `disable_trustworthiness` is True.
 
-                  num_consistency_samples (int, default = 8): the amount of internal sampling to measure LLM response consistency, a factor affecting trustworthiness scoring.
-                  Must be between 0 and 20. Higher values produce more reliable TLM trustworthiness scores, but at higher runtimes/costs.
+                  num_consistency_samples (int, default = 8): the amount of internal sampling to measure LLM response consistency, a factor affecting trust scoring.
+                  Must be between 0 and 20. Lower values can reduce runtimes.
                   Measuring consistency helps quantify the epistemic uncertainty associated with
                   strange prompts or prompts that are too vague/open-ended to receive a clearly defined 'good' response.
                   TLM measures consistency via the degree of contradiction between sampled responses that the model considers plausible.
-
-                  num_self_reflections(int, default = 3): the number of self-reflections to perform where the LLM is asked to reflect on the given response and directly evaluate correctness/confidence.
-                  The maximum number of self-reflections currently supported is 3. Lower values will reduce runtimes/costs, but potentially also the reliability of trustworthiness scores.
-                  Reflection helps quantify aleatoric uncertainty associated with challenging prompts and catches responses that are noticeably incorrect/bad upon further analysis.
+                  This parameter has no effect when `disable_trustworthiness` is True.
 
                   similarity_measure ({"semantic", "string", "embedding", "embedding_large", "code", "discrepancy"}, default = "discrepancy"): how the
                   trustworthiness scoring's consistency algorithm measures similarity between alternative responses considered plausible by the model.
                   Supported similarity measures include - "semantic" (based on natural language inference),
                   "embedding" (based on vector embedding similarity), "embedding_large" (based on a larger embedding model),
                   "code" (based on model-based analysis designed to compare code), "discrepancy" (based on model-based analysis of possible discrepancies),
-                  and "string" (based on character/word overlap). Set this to "string" for minimal runtimes/costs.
-
-                  reasoning_effort ({"none", "low", "medium", "high"}, default = "high"): how much internal LLM calls are allowed to reason (number of thinking tokens)
-                  when generating alternative possible responses and reflecting on responses during trustworthiness scoring.
-                  Higher reasoning efforts may yield more reliable TLM trustworthiness scores. Reduce this value to reduce runtimes/costs.
-
-                  log (list[str], default = []): optionally specify additional logs or metadata that TLM should return.
-                  For instance, include "explanation" here to get explanations of why a response is scored with low trustworthiness.
+                  and "string" (based on character/word overlap). Set this to "string" for minimal runtimes.
+                  This parameter has no effect when `num_consistency_samples = 0`.
 
-                  custom_eval_criteria (list[dict[str, Any]], default = []): optionally specify custom evalution criteria beyond the built-in trustworthiness scoring.
-                  The expected input format is a list of dictionaries, where each dictionary has the following keys:
-                  - name: Name of the evaluation criteria.
-                  - criteria: Instructions specifying the evaluation criteria.
+                  num_candidate_responses (int, default = 1): how many alternative candidate responses are internally generated in `TLM.prompt()`.
+                  `TLM.prompt()` scores the trustworthiness of each candidate response, and then returns the most trustworthy one.
+                  You can auto-improve responses by increasing this parameter, but at higher runtimes/costs.
+                  This parameter must be between 1 and 20. It has no effect on `TLM.score()`.
+                  When this parameter is 1, `TLM.prompt()` simply returns a standard LLM response and does not attempt to auto-improve it.
+                  This parameter has no effect when `disable_trustworthiness` is True.
 
-                  use_self_reflection (bool, default = `True`): deprecated. Use `num_self_reflections` instead.
+                  disable_trustworthiness (bool, default = False): if True, trustworthiness scoring is disabled and TLM will not compute trust scores for responses.
+                  This is useful when you only want to use custom evaluation criteria or when you want to minimize computational overhead and only need the base LLM response.
+                  The following parameters will be ignored when `disable_trustworthiness` is True: `num_consistency_samples`, `num_self_reflections`, `num_candidate_responses`, `reasoning_effort`, `similarity_measure`.
 
           quality_preset: The quality preset to use for the TLM or Trustworthy RAG API.
 
diff --git a/src/codex/types/project_invite_sme_params.py b/src/codex/types/project_invite_sme_params.py
index f2694632..974ef7c3 100644
--- a/src/codex/types/project_invite_sme_params.py
+++ b/src/codex/types/project_invite_sme_params.py
@@ -10,6 +10,6 @@
 class ProjectInviteSmeParams(TypedDict, total=False):
     email: Required[str]
 
-    page_type: Required[Literal["query_log", "remediation"]]
+    page_type: Required[Literal["query_log", "remediation", "prioritized_issue"]]
 
     url_query_string: Required[str]
diff --git a/src/codex/types/project_validate_params.py b/src/codex/types/project_validate_params.py
index 62313671..719ad3d3 100644
--- a/src/codex/types/project_validate_params.py
+++ b/src/codex/types/project_validate_params.py
@@ -66,7 +66,7 @@ class ProjectValidateParams(TypedDict, total=False):
 
     response: Required[Response]
 
-    use_llm_matching: bool
+    use_llm_matching: Optional[bool]
 
     constrain_outputs: Optional[List[str]]
 
@@ -123,60 +123,65 @@ class ProjectValidateParams(TypedDict, total=False):
     `model`, and `max_tokens` is set to 512. You can set custom values for these
     arguments regardless of the quality preset specified.
 
-    Args: model ({"gpt-4.1", "gpt-4.1-mini", "gpt-4.1-nano", "o4-mini", "o3",
-    "gpt-4.5-preview", "gpt-4o-mini", "gpt-4o", "o3-mini", "o1", "o1-mini", "gpt-4",
-    "gpt-3.5-turbo-16k", "claude-opus-4-0", "claude-sonnet-4-0",
-    "claude-3.7-sonnet", "claude-3.5-sonnet-v2", "claude-3.5-sonnet",
-    "claude-3.5-haiku", "claude-3-haiku", "nova-micro", "nova-lite", "nova-pro"},
-    default = "gpt-4.1-mini"): Underlying base LLM to use (better models yield
-    better results, faster models yield faster results). - Models still in beta:
-    "o3", "o1", "o4-mini", "o3-mini", "o1-mini", "gpt-4.5-preview",
-    "claude-opus-4-0", "claude-sonnet-4-0", "claude-3.7-sonnet",
-    "claude-3.5-haiku". - Recommended models for accuracy: "gpt-4.1", "o4-mini",
-    "o3", "claude-opus-4-0", "claude-sonnet-4-0". - Recommended models for low
-    latency/costs: "gpt-4.1-nano", "nova-micro".
-
-        max_tokens (int, default = 512): the maximum number of tokens that can be generated in the TLM response (and in internal trustworthiness scoring).
-        Higher values here may produce better (more reliable) TLM responses and trustworthiness scores, but at higher runtimes/costs.
-        If you experience token/rate limit errors while using TLM, try lowering this number.
+    Args: model ({"gpt-5", "gpt-5-mini", "gpt-5-nano", "gpt-4.1", "gpt-4.1-mini",
+    "gpt-4.1-nano", "o4-mini", "o3", "gpt-4.5-preview", "gpt-4o-mini", "gpt-4o",
+    "o3-mini", "o1", "o1-mini", "gpt-4", "gpt-3.5-turbo-16k", "claude-opus-4-0",
+    "claude-sonnet-4-0", "claude-3.7-sonnet", "claude-3.5-sonnet-v2",
+    "claude-3.5-sonnet", "claude-3.5-haiku", "claude-3-haiku", "nova-micro",
+    "nova-lite", "nova-pro"}, default = "gpt-4.1-mini"): Underlying base LLM to use
+    (better models yield better results, faster models yield faster results). -
+    Models still in beta: "o3", "o1", "o4-mini", "o3-mini", "o1-mini",
+    "gpt-4.5-preview", "claude-opus-4-0", "claude-sonnet-4-0", "claude-3.7-sonnet",
+    "claude-3.5-haiku". - Recommended models for accuracy: "gpt-5", "gpt-4.1",
+    "o4-mini", "o3", "claude-opus-4-0", "claude-sonnet-4-0". - Recommended models
+    for low latency/costs: "gpt-4.1-nano", "nova-micro".
+
+        log (list[str], default = []): optionally specify additional logs or metadata that TLM should return.
+        For instance, include "explanation" here to get explanations of why a response is scored with low trustworthiness.
+
+        custom_eval_criteria (list[dict[str, Any]], default = []): optionally specify custom evalution criteria beyond the built-in trustworthiness scoring.
+        The expected input format is a list of dictionaries, where each dictionary has the following keys:
+        - name: Name of the evaluation criteria.
+        - criteria: Instructions specifying the evaluation criteria.
+
+        max_tokens (int, default = 512): the maximum number of tokens that can be generated in the response from `TLM.prompt()` as well as during internal trustworthiness scoring.
+        If you experience token/rate-limit errors, try lowering this number.
         For OpenAI models, this parameter must be between 64 and 4096. For Claude models, this parameter must be between 64 and 512.
 
-        num_candidate_responses (int, default = 1): how many alternative candidate responses are internally generated in `TLM.prompt()`.
-        `TLM.prompt()` scores the trustworthiness of each candidate response, and then returns the most trustworthy one.
-        This parameter must be between 1 and 20. It has no effect on `TLM.score()`.
-        Higher values here can produce more accurate responses from `TLM.prompt()`, but at higher runtimes/costs.
-        When it is 1, `TLM.prompt()` simply returns a standard LLM response and does not attempt to auto-improve it.
+        reasoning_effort ({"none", "low", "medium", "high"}, default = "high"): how much internal LLM calls are allowed to reason (number of thinking tokens)
+        when generating alternative possible responses and reflecting on responses during trustworthiness scoring.
+        Reduce this value to reduce runtimes. Higher values may improve trust scoring.
+
+        num_self_reflections (int, default = 3): the number of different evaluations to perform where the LLM reflects on the response, a factor affecting trust scoring.
+        The maximum number currently supported is 3. Lower values can reduce runtimes.
+        Reflection helps quantify aleatoric uncertainty associated with challenging prompts and catches responses that are noticeably incorrect/bad upon further analysis.
+        This parameter has no effect when `disable_trustworthiness` is True.
 
-        num_consistency_samples (int, default = 8): the amount of internal sampling to measure LLM response consistency, a factor affecting trustworthiness scoring.
-        Must be between 0 and 20. Higher values produce more reliable TLM trustworthiness scores, but at higher runtimes/costs.
+        num_consistency_samples (int, default = 8): the amount of internal sampling to measure LLM response consistency, a factor affecting trust scoring.
+        Must be between 0 and 20. Lower values can reduce runtimes.
         Measuring consistency helps quantify the epistemic uncertainty associated with
         strange prompts or prompts that are too vague/open-ended to receive a clearly defined 'good' response.
         TLM measures consistency via the degree of contradiction between sampled responses that the model considers plausible.
-
-        num_self_reflections(int, default = 3): the number of self-reflections to perform where the LLM is asked to reflect on the given response and directly evaluate correctness/confidence.
-        The maximum number of self-reflections currently supported is 3. Lower values will reduce runtimes/costs, but potentially also the reliability of trustworthiness scores.
-        Reflection helps quantify aleatoric uncertainty associated with challenging prompts and catches responses that are noticeably incorrect/bad upon further analysis.
+        This parameter has no effect when `disable_trustworthiness` is True.
 
         similarity_measure ({"semantic", "string", "embedding", "embedding_large", "code", "discrepancy"}, default = "discrepancy"): how the
         trustworthiness scoring's consistency algorithm measures similarity between alternative responses considered plausible by the model.
         Supported similarity measures include - "semantic" (based on natural language inference),
         "embedding" (based on vector embedding similarity), "embedding_large" (based on a larger embedding model),
         "code" (based on model-based analysis designed to compare code), "discrepancy" (based on model-based analysis of possible discrepancies),
-        and "string" (based on character/word overlap). Set this to "string" for minimal runtimes/costs.
-
-        reasoning_effort ({"none", "low", "medium", "high"}, default = "high"): how much internal LLM calls are allowed to reason (number of thinking tokens)
-        when generating alternative possible responses and reflecting on responses during trustworthiness scoring.
-        Higher reasoning efforts may yield more reliable TLM trustworthiness scores. Reduce this value to reduce runtimes/costs.
+        and "string" (based on character/word overlap). Set this to "string" for minimal runtimes.
+        This parameter has no effect when `num_consistency_samples = 0`.
 
-        log (list[str], default = []): optionally specify additional logs or metadata that TLM should return.
-        For instance, include "explanation" here to get explanations of why a response is scored with low trustworthiness.
-
-        custom_eval_criteria (list[dict[str, Any]], default = []): optionally specify custom evalution criteria beyond the built-in trustworthiness scoring.
-        The expected input format is a list of dictionaries, where each dictionary has the following keys:
-        - name: Name of the evaluation criteria.
-        - criteria: Instructions specifying the evaluation criteria.
+        num_candidate_responses (int, default = 1): how many alternative candidate responses are internally generated in `TLM.prompt()`.
+        `TLM.prompt()` scores the trustworthiness of each candidate response, and then returns the most trustworthy one.
+        You can auto-improve responses by increasing this parameter, but at higher runtimes/costs.
+        This parameter must be between 1 and 20. It has no effect on `TLM.score()`.
+        When this parameter is 1, `TLM.prompt()` simply returns a standard LLM response and does not attempt to auto-improve it.
+        This parameter has no effect when `disable_trustworthiness` is True.
 
-        use_self_reflection (bool, default = `True`): deprecated. Use `num_self_reflections` instead.
+        disable_trustworthiness (bool, default = False): if True, trustworthiness scoring is disabled and TLM will not compute trust scores for responses.
+        This is useful when you only want to use custom evaluation criteria or when you want to minimize computational overhead and only need the base LLM response.
+        The following parameters will be ignored when `disable_trustworthiness` is True: `num_consistency_samples`, `num_self_reflections`, `num_candidate_responses`, `reasoning_effort`, `similarity_measure`.
     """
 
     prompt: Optional[str]
@@ -647,6 +652,10 @@ class MessageChatCompletionDeveloperMessageParam(TypedDict, total=False):
 class Options(TypedDict, total=False):
     custom_eval_criteria: Iterable[object]
 
+    disable_persistence: bool
+
+    disable_trustworthiness: bool
+
     log: List[str]
 
     max_tokens: int
diff --git a/src/codex/types/project_validate_response.py b/src/codex/types/project_validate_response.py
index 44883119..003b676c 100644
--- a/src/codex/types/project_validate_response.py
+++ b/src/codex/types/project_validate_response.py
@@ -59,6 +59,9 @@ class ProjectValidateResponse(BaseModel):
     to answer, if it does not already exist.
     """
 
+    log_id: str
+    """The UUID of the query log entry created for this validation request."""
+
     should_guardrail: bool
     """
     True if the response should be guardrailed by the AI system, False if the
diff --git a/src/codex/types/projects/query_log_list_by_group_params.py b/src/codex/types/projects/query_log_list_by_group_params.py
index 90bd3867..0fbb2804 100644
--- a/src/codex/types/projects/query_log_list_by_group_params.py
+++ b/src/codex/types/projects/query_log_list_by_group_params.py
@@ -27,6 +27,9 @@ class QueryLogListByGroupParams(TypedDict, total=False):
     guardrailed: Optional[bool]
     """Filter by guardrailed status"""
 
+    has_tool_calls: Optional[bool]
+    """Filter by whether the query log has tool calls"""
+
     limit: int
 
     needs_review: Optional[bool]
@@ -49,5 +52,8 @@ class QueryLogListByGroupParams(TypedDict, total=False):
 
     sort: Optional[Literal["created_at", "primary_eval_issue_score"]]
 
+    tool_call_names: Optional[List[str]]
+    """Filter by names of tools called in the assistant response"""
+
     was_cache_hit: Optional[bool]
     """Filter by cache hit status"""
diff --git a/src/codex/types/projects/query_log_list_by_group_response.py b/src/codex/types/projects/query_log_list_by_group_response.py
index b3c774ba..fc33cdeb 100644
--- a/src/codex/types/projects/query_log_list_by_group_response.py
+++ b/src/codex/types/projects/query_log_list_by_group_response.py
@@ -358,6 +358,8 @@ class QueryLogsByGroupQueryLog(BaseModel):
 
     remediation_status: Literal["ACTIVE", "DRAFT", "ACTIVE_WITH_DRAFT", "NOT_STARTED", "PAUSED", "NO_ACTION_NEEDED"]
 
+    tool_call_names: Optional[List[str]] = None
+
     was_cache_hit: Optional[bool] = None
     """If similar query already answered, or None if cache was not checked"""
 
@@ -438,5 +440,9 @@ class QueryLogsByGroup(BaseModel):
 
 class QueryLogListByGroupResponse(BaseModel):
     custom_metadata_columns: List[str]
+    """Columns of the custom metadata"""
 
     query_logs_by_group: Dict[str, QueryLogsByGroup]
+
+    tool_names: Optional[List[str]] = None
+    """Names of the tools available in queries"""
diff --git a/src/codex/types/projects/query_log_list_groups_params.py b/src/codex/types/projects/query_log_list_groups_params.py
index f75ee299..6adefdf5 100644
--- a/src/codex/types/projects/query_log_list_groups_params.py
+++ b/src/codex/types/projects/query_log_list_groups_params.py
@@ -27,6 +27,9 @@ class QueryLogListGroupsParams(TypedDict, total=False):
     guardrailed: Optional[bool]
     """Filter by guardrailed status"""
 
+    has_tool_calls: Optional[bool]
+    """Filter by whether the query log has tool calls"""
+
     limit: int
 
     needs_review: Optional[bool]
@@ -46,5 +49,8 @@ class QueryLogListGroupsParams(TypedDict, total=False):
 
     sort: Optional[Literal["created_at", "primary_eval_issue_score", "total_count", "custom_rank", "impact_score"]]
 
+    tool_call_names: Optional[List[str]]
+    """Filter by names of tools called in the assistant response"""
+
     was_cache_hit: Optional[bool]
     """Filter by cache hit status"""
diff --git a/src/codex/types/projects/query_log_list_groups_response.py b/src/codex/types/projects/query_log_list_groups_response.py
index 6ed4d146..7b2d44c9 100644
--- a/src/codex/types/projects/query_log_list_groups_response.py
+++ b/src/codex/types/projects/query_log_list_groups_response.py
@@ -314,6 +314,9 @@ class Tool(BaseModel):
 class QueryLogListGroupsResponse(BaseModel):
     id: str
 
+    any_escalated: bool
+    """Whether any query log in the group was escalated"""
+
     created_at: datetime
 
     formatted_escalation_eval_scores: Optional[Dict[str, FormattedEscalationEvalScores]] = None
@@ -346,6 +349,8 @@ class QueryLogListGroupsResponse(BaseModel):
 
     remediation_status: Literal["ACTIVE", "DRAFT", "ACTIVE_WITH_DRAFT", "NOT_STARTED", "PAUSED", "NO_ACTION_NEEDED"]
 
+    tool_call_names: Optional[List[str]] = None
+
     total_count: int
 
     was_cache_hit: Optional[bool] = None
diff --git a/src/codex/types/projects/query_log_list_params.py b/src/codex/types/projects/query_log_list_params.py
index 5892d3c9..02c1707b 100644
--- a/src/codex/types/projects/query_log_list_params.py
+++ b/src/codex/types/projects/query_log_list_params.py
@@ -27,6 +27,9 @@ class QueryLogListParams(TypedDict, total=False):
     guardrailed: Optional[bool]
     """Filter by guardrailed status"""
 
+    has_tool_calls: Optional[bool]
+    """Filter by whether the query log has tool calls"""
+
     limit: int
 
     offset: int
@@ -43,5 +46,8 @@ class QueryLogListParams(TypedDict, total=False):
 
     sort: Optional[Literal["created_at", "primary_eval_issue_score"]]
 
+    tool_call_names: Optional[List[str]]
+    """Filter by names of tools called in the assistant response"""
+
     was_cache_hit: Optional[bool]
     """Filter by cache hit status"""
diff --git a/src/codex/types/projects/query_log_list_response.py b/src/codex/types/projects/query_log_list_response.py
index c6737b2f..b56d43d3 100644
--- a/src/codex/types/projects/query_log_list_response.py
+++ b/src/codex/types/projects/query_log_list_response.py
@@ -342,6 +342,8 @@ class QueryLogListResponse(BaseModel):
 
     remediation_id: str
 
+    tool_call_names: Optional[List[str]] = None
+
     was_cache_hit: Optional[bool] = None
     """If similar query already answered, or None if cache was not checked"""
 
diff --git a/src/codex/types/projects/query_log_retrieve_response.py b/src/codex/types/projects/query_log_retrieve_response.py
index 8fd8662e..b9be8d6d 100644
--- a/src/codex/types/projects/query_log_retrieve_response.py
+++ b/src/codex/types/projects/query_log_retrieve_response.py
@@ -346,6 +346,8 @@ class QueryLogRetrieveResponse(BaseModel):
 
     remediation_status: Literal["ACTIVE", "DRAFT", "ACTIVE_WITH_DRAFT", "NOT_STARTED", "PAUSED", "NO_ACTION_NEEDED"]
 
+    tool_call_names: Optional[List[str]] = None
+
     was_cache_hit: Optional[bool] = None
     """If similar query already answered, or None if cache was not checked"""
 
diff --git a/src/codex/types/projects/remediation_list_resolved_logs_response.py b/src/codex/types/projects/remediation_list_resolved_logs_response.py
index 567a0869..ed764766 100644
--- a/src/codex/types/projects/remediation_list_resolved_logs_response.py
+++ b/src/codex/types/projects/remediation_list_resolved_logs_response.py
@@ -349,6 +349,8 @@ class QueryLog(BaseModel):
 
     remediation_id: str
 
+    tool_call_names: Optional[List[str]] = None
+
     was_cache_hit: Optional[bool] = None
     """If similar query already answered, or None if cache was not checked"""
 
diff --git a/src/codex/types/tlm_prompt_params.py b/src/codex/types/tlm_prompt_params.py
index 8749c5ac..821c3811 100644
--- a/src/codex/types/tlm_prompt_params.py
+++ b/src/codex/types/tlm_prompt_params.py
@@ -45,60 +45,65 @@ class TlmPromptParams(TypedDict, total=False):
     `model`, and `max_tokens` is set to 512. You can set custom values for these
     arguments regardless of the quality preset specified.
 
-    Args: model ({"gpt-4.1", "gpt-4.1-mini", "gpt-4.1-nano", "o4-mini", "o3",
-    "gpt-4.5-preview", "gpt-4o-mini", "gpt-4o", "o3-mini", "o1", "o1-mini", "gpt-4",
-    "gpt-3.5-turbo-16k", "claude-opus-4-0", "claude-sonnet-4-0",
-    "claude-3.7-sonnet", "claude-3.5-sonnet-v2", "claude-3.5-sonnet",
-    "claude-3.5-haiku", "claude-3-haiku", "nova-micro", "nova-lite", "nova-pro"},
-    default = "gpt-4.1-mini"): Underlying base LLM to use (better models yield
-    better results, faster models yield faster results). - Models still in beta:
-    "o3", "o1", "o4-mini", "o3-mini", "o1-mini", "gpt-4.5-preview",
-    "claude-opus-4-0", "claude-sonnet-4-0", "claude-3.7-sonnet",
-    "claude-3.5-haiku". - Recommended models for accuracy: "gpt-4.1", "o4-mini",
-    "o3", "claude-opus-4-0", "claude-sonnet-4-0". - Recommended models for low
-    latency/costs: "gpt-4.1-nano", "nova-micro".
-
-        max_tokens (int, default = 512): the maximum number of tokens that can be generated in the TLM response (and in internal trustworthiness scoring).
-        Higher values here may produce better (more reliable) TLM responses and trustworthiness scores, but at higher runtimes/costs.
-        If you experience token/rate limit errors while using TLM, try lowering this number.
+    Args: model ({"gpt-5", "gpt-5-mini", "gpt-5-nano", "gpt-4.1", "gpt-4.1-mini",
+    "gpt-4.1-nano", "o4-mini", "o3", "gpt-4.5-preview", "gpt-4o-mini", "gpt-4o",
+    "o3-mini", "o1", "o1-mini", "gpt-4", "gpt-3.5-turbo-16k", "claude-opus-4-0",
+    "claude-sonnet-4-0", "claude-3.7-sonnet", "claude-3.5-sonnet-v2",
+    "claude-3.5-sonnet", "claude-3.5-haiku", "claude-3-haiku", "nova-micro",
+    "nova-lite", "nova-pro"}, default = "gpt-4.1-mini"): Underlying base LLM to use
+    (better models yield better results, faster models yield faster results). -
+    Models still in beta: "o3", "o1", "o4-mini", "o3-mini", "o1-mini",
+    "gpt-4.5-preview", "claude-opus-4-0", "claude-sonnet-4-0", "claude-3.7-sonnet",
+    "claude-3.5-haiku". - Recommended models for accuracy: "gpt-5", "gpt-4.1",
+    "o4-mini", "o3", "claude-opus-4-0", "claude-sonnet-4-0". - Recommended models
+    for low latency/costs: "gpt-4.1-nano", "nova-micro".
+
+        log (list[str], default = []): optionally specify additional logs or metadata that TLM should return.
+        For instance, include "explanation" here to get explanations of why a response is scored with low trustworthiness.
+
+        custom_eval_criteria (list[dict[str, Any]], default = []): optionally specify custom evalution criteria beyond the built-in trustworthiness scoring.
+        The expected input format is a list of dictionaries, where each dictionary has the following keys:
+        - name: Name of the evaluation criteria.
+        - criteria: Instructions specifying the evaluation criteria.
+
+        max_tokens (int, default = 512): the maximum number of tokens that can be generated in the response from `TLM.prompt()` as well as during internal trustworthiness scoring.
+        If you experience token/rate-limit errors, try lowering this number.
         For OpenAI models, this parameter must be between 64 and 4096. For Claude models, this parameter must be between 64 and 512.
 
-        num_candidate_responses (int, default = 1): how many alternative candidate responses are internally generated in `TLM.prompt()`.
-        `TLM.prompt()` scores the trustworthiness of each candidate response, and then returns the most trustworthy one.
-        This parameter must be between 1 and 20. It has no effect on `TLM.score()`.
-        Higher values here can produce more accurate responses from `TLM.prompt()`, but at higher runtimes/costs.
-        When it is 1, `TLM.prompt()` simply returns a standard LLM response and does not attempt to auto-improve it.
+        reasoning_effort ({"none", "low", "medium", "high"}, default = "high"): how much internal LLM calls are allowed to reason (number of thinking tokens)
+        when generating alternative possible responses and reflecting on responses during trustworthiness scoring.
+        Reduce this value to reduce runtimes. Higher values may improve trust scoring.
+
+        num_self_reflections (int, default = 3): the number of different evaluations to perform where the LLM reflects on the response, a factor affecting trust scoring.
+        The maximum number currently supported is 3. Lower values can reduce runtimes.
+        Reflection helps quantify aleatoric uncertainty associated with challenging prompts and catches responses that are noticeably incorrect/bad upon further analysis.
+        This parameter has no effect when `disable_trustworthiness` is True.
 
-        num_consistency_samples (int, default = 8): the amount of internal sampling to measure LLM response consistency, a factor affecting trustworthiness scoring.
-        Must be between 0 and 20. Higher values produce more reliable TLM trustworthiness scores, but at higher runtimes/costs.
+        num_consistency_samples (int, default = 8): the amount of internal sampling to measure LLM response consistency, a factor affecting trust scoring.
+        Must be between 0 and 20. Lower values can reduce runtimes.
         Measuring consistency helps quantify the epistemic uncertainty associated with
         strange prompts or prompts that are too vague/open-ended to receive a clearly defined 'good' response.
         TLM measures consistency via the degree of contradiction between sampled responses that the model considers plausible.
-
-        num_self_reflections(int, default = 3): the number of self-reflections to perform where the LLM is asked to reflect on the given response and directly evaluate correctness/confidence.
-        The maximum number of self-reflections currently supported is 3. Lower values will reduce runtimes/costs, but potentially also the reliability of trustworthiness scores.
-        Reflection helps quantify aleatoric uncertainty associated with challenging prompts and catches responses that are noticeably incorrect/bad upon further analysis.
+        This parameter has no effect when `disable_trustworthiness` is True.
 
         similarity_measure ({"semantic", "string", "embedding", "embedding_large", "code", "discrepancy"}, default = "discrepancy"): how the
         trustworthiness scoring's consistency algorithm measures similarity between alternative responses considered plausible by the model.
         Supported similarity measures include - "semantic" (based on natural language inference),
         "embedding" (based on vector embedding similarity), "embedding_large" (based on a larger embedding model),
         "code" (based on model-based analysis designed to compare code), "discrepancy" (based on model-based analysis of possible discrepancies),
-        and "string" (based on character/word overlap). Set this to "string" for minimal runtimes/costs.
-
-        reasoning_effort ({"none", "low", "medium", "high"}, default = "high"): how much internal LLM calls are allowed to reason (number of thinking tokens)
-        when generating alternative possible responses and reflecting on responses during trustworthiness scoring.
-        Higher reasoning efforts may yield more reliable TLM trustworthiness scores. Reduce this value to reduce runtimes/costs.
+        and "string" (based on character/word overlap). Set this to "string" for minimal runtimes.
+        This parameter has no effect when `num_consistency_samples = 0`.
 
-        log (list[str], default = []): optionally specify additional logs or metadata that TLM should return.
-        For instance, include "explanation" here to get explanations of why a response is scored with low trustworthiness.
-
-        custom_eval_criteria (list[dict[str, Any]], default = []): optionally specify custom evalution criteria beyond the built-in trustworthiness scoring.
-        The expected input format is a list of dictionaries, where each dictionary has the following keys:
-        - name: Name of the evaluation criteria.
-        - criteria: Instructions specifying the evaluation criteria.
+        num_candidate_responses (int, default = 1): how many alternative candidate responses are internally generated in `TLM.prompt()`.
+        `TLM.prompt()` scores the trustworthiness of each candidate response, and then returns the most trustworthy one.
+        You can auto-improve responses by increasing this parameter, but at higher runtimes/costs.
+        This parameter must be between 1 and 20. It has no effect on `TLM.score()`.
+        When this parameter is 1, `TLM.prompt()` simply returns a standard LLM response and does not attempt to auto-improve it.
+        This parameter has no effect when `disable_trustworthiness` is True.
 
-        use_self_reflection (bool, default = `True`): deprecated. Use `num_self_reflections` instead.
+        disable_trustworthiness (bool, default = False): if True, trustworthiness scoring is disabled and TLM will not compute trust scores for responses.
+        This is useful when you only want to use custom evaluation criteria or when you want to minimize computational overhead and only need the base LLM response.
+        The following parameters will be ignored when `disable_trustworthiness` is True: `num_consistency_samples`, `num_self_reflections`, `num_candidate_responses`, `reasoning_effort`, `similarity_measure`.
     """
 
     quality_preset: Literal["best", "high", "medium", "low", "base"]
@@ -110,6 +115,10 @@ class TlmPromptParams(TypedDict, total=False):
 class Options(TypedDict, total=False):
     custom_eval_criteria: Iterable[object]
 
+    disable_persistence: bool
+
+    disable_trustworthiness: bool
+
     log: List[str]
 
     max_tokens: int
diff --git a/src/codex/types/tlm_score_params.py b/src/codex/types/tlm_score_params.py
index 4a0a32ad..d676a1d6 100644
--- a/src/codex/types/tlm_score_params.py
+++ b/src/codex/types/tlm_score_params.py
@@ -47,60 +47,65 @@ class TlmScoreParams(TypedDict, total=False):
     `model`, and `max_tokens` is set to 512. You can set custom values for these
     arguments regardless of the quality preset specified.
 
-    Args: model ({"gpt-4.1", "gpt-4.1-mini", "gpt-4.1-nano", "o4-mini", "o3",
-    "gpt-4.5-preview", "gpt-4o-mini", "gpt-4o", "o3-mini", "o1", "o1-mini", "gpt-4",
-    "gpt-3.5-turbo-16k", "claude-opus-4-0", "claude-sonnet-4-0",
-    "claude-3.7-sonnet", "claude-3.5-sonnet-v2", "claude-3.5-sonnet",
-    "claude-3.5-haiku", "claude-3-haiku", "nova-micro", "nova-lite", "nova-pro"},
-    default = "gpt-4.1-mini"): Underlying base LLM to use (better models yield
-    better results, faster models yield faster results). - Models still in beta:
-    "o3", "o1", "o4-mini", "o3-mini", "o1-mini", "gpt-4.5-preview",
-    "claude-opus-4-0", "claude-sonnet-4-0", "claude-3.7-sonnet",
-    "claude-3.5-haiku". - Recommended models for accuracy: "gpt-4.1", "o4-mini",
-    "o3", "claude-opus-4-0", "claude-sonnet-4-0". - Recommended models for low
-    latency/costs: "gpt-4.1-nano", "nova-micro".
-
-        max_tokens (int, default = 512): the maximum number of tokens that can be generated in the TLM response (and in internal trustworthiness scoring).
-        Higher values here may produce better (more reliable) TLM responses and trustworthiness scores, but at higher runtimes/costs.
-        If you experience token/rate limit errors while using TLM, try lowering this number.
+    Args: model ({"gpt-5", "gpt-5-mini", "gpt-5-nano", "gpt-4.1", "gpt-4.1-mini",
+    "gpt-4.1-nano", "o4-mini", "o3", "gpt-4.5-preview", "gpt-4o-mini", "gpt-4o",
+    "o3-mini", "o1", "o1-mini", "gpt-4", "gpt-3.5-turbo-16k", "claude-opus-4-0",
+    "claude-sonnet-4-0", "claude-3.7-sonnet", "claude-3.5-sonnet-v2",
+    "claude-3.5-sonnet", "claude-3.5-haiku", "claude-3-haiku", "nova-micro",
+    "nova-lite", "nova-pro"}, default = "gpt-4.1-mini"): Underlying base LLM to use
+    (better models yield better results, faster models yield faster results). -
+    Models still in beta: "o3", "o1", "o4-mini", "o3-mini", "o1-mini",
+    "gpt-4.5-preview", "claude-opus-4-0", "claude-sonnet-4-0", "claude-3.7-sonnet",
+    "claude-3.5-haiku". - Recommended models for accuracy: "gpt-5", "gpt-4.1",
+    "o4-mini", "o3", "claude-opus-4-0", "claude-sonnet-4-0". - Recommended models
+    for low latency/costs: "gpt-4.1-nano", "nova-micro".
+
+        log (list[str], default = []): optionally specify additional logs or metadata that TLM should return.
+        For instance, include "explanation" here to get explanations of why a response is scored with low trustworthiness.
+
+        custom_eval_criteria (list[dict[str, Any]], default = []): optionally specify custom evalution criteria beyond the built-in trustworthiness scoring.
+        The expected input format is a list of dictionaries, where each dictionary has the following keys:
+        - name: Name of the evaluation criteria.
+        - criteria: Instructions specifying the evaluation criteria.
+
+        max_tokens (int, default = 512): the maximum number of tokens that can be generated in the response from `TLM.prompt()` as well as during internal trustworthiness scoring.
+        If you experience token/rate-limit errors, try lowering this number.
         For OpenAI models, this parameter must be between 64 and 4096. For Claude models, this parameter must be between 64 and 512.
 
-        num_candidate_responses (int, default = 1): how many alternative candidate responses are internally generated in `TLM.prompt()`.
-        `TLM.prompt()` scores the trustworthiness of each candidate response, and then returns the most trustworthy one.
-        This parameter must be between 1 and 20. It has no effect on `TLM.score()`.
-        Higher values here can produce more accurate responses from `TLM.prompt()`, but at higher runtimes/costs.
-        When it is 1, `TLM.prompt()` simply returns a standard LLM response and does not attempt to auto-improve it.
+        reasoning_effort ({"none", "low", "medium", "high"}, default = "high"): how much internal LLM calls are allowed to reason (number of thinking tokens)
+        when generating alternative possible responses and reflecting on responses during trustworthiness scoring.
+        Reduce this value to reduce runtimes. Higher values may improve trust scoring.
+
+        num_self_reflections (int, default = 3): the number of different evaluations to perform where the LLM reflects on the response, a factor affecting trust scoring.
+        The maximum number currently supported is 3. Lower values can reduce runtimes.
+        Reflection helps quantify aleatoric uncertainty associated with challenging prompts and catches responses that are noticeably incorrect/bad upon further analysis.
+        This parameter has no effect when `disable_trustworthiness` is True.
 
-        num_consistency_samples (int, default = 8): the amount of internal sampling to measure LLM response consistency, a factor affecting trustworthiness scoring.
-        Must be between 0 and 20. Higher values produce more reliable TLM trustworthiness scores, but at higher runtimes/costs.
+        num_consistency_samples (int, default = 8): the amount of internal sampling to measure LLM response consistency, a factor affecting trust scoring.
+        Must be between 0 and 20. Lower values can reduce runtimes.
         Measuring consistency helps quantify the epistemic uncertainty associated with
         strange prompts or prompts that are too vague/open-ended to receive a clearly defined 'good' response.
         TLM measures consistency via the degree of contradiction between sampled responses that the model considers plausible.
-
-        num_self_reflections(int, default = 3): the number of self-reflections to perform where the LLM is asked to reflect on the given response and directly evaluate correctness/confidence.
-        The maximum number of self-reflections currently supported is 3. Lower values will reduce runtimes/costs, but potentially also the reliability of trustworthiness scores.
-        Reflection helps quantify aleatoric uncertainty associated with challenging prompts and catches responses that are noticeably incorrect/bad upon further analysis.
+        This parameter has no effect when `disable_trustworthiness` is True.
 
         similarity_measure ({"semantic", "string", "embedding", "embedding_large", "code", "discrepancy"}, default = "discrepancy"): how the
         trustworthiness scoring's consistency algorithm measures similarity between alternative responses considered plausible by the model.
         Supported similarity measures include - "semantic" (based on natural language inference),
         "embedding" (based on vector embedding similarity), "embedding_large" (based on a larger embedding model),
         "code" (based on model-based analysis designed to compare code), "discrepancy" (based on model-based analysis of possible discrepancies),
-        and "string" (based on character/word overlap). Set this to "string" for minimal runtimes/costs.
-
-        reasoning_effort ({"none", "low", "medium", "high"}, default = "high"): how much internal LLM calls are allowed to reason (number of thinking tokens)
-        when generating alternative possible responses and reflecting on responses during trustworthiness scoring.
-        Higher reasoning efforts may yield more reliable TLM trustworthiness scores. Reduce this value to reduce runtimes/costs.
+        and "string" (based on character/word overlap). Set this to "string" for minimal runtimes.
+        This parameter has no effect when `num_consistency_samples = 0`.
 
-        log (list[str], default = []): optionally specify additional logs or metadata that TLM should return.
-        For instance, include "explanation" here to get explanations of why a response is scored with low trustworthiness.
-
-        custom_eval_criteria (list[dict[str, Any]], default = []): optionally specify custom evalution criteria beyond the built-in trustworthiness scoring.
-        The expected input format is a list of dictionaries, where each dictionary has the following keys:
-        - name: Name of the evaluation criteria.
-        - criteria: Instructions specifying the evaluation criteria.
+        num_candidate_responses (int, default = 1): how many alternative candidate responses are internally generated in `TLM.prompt()`.
+        `TLM.prompt()` scores the trustworthiness of each candidate response, and then returns the most trustworthy one.
+        You can auto-improve responses by increasing this parameter, but at higher runtimes/costs.
+        This parameter must be between 1 and 20. It has no effect on `TLM.score()`.
+        When this parameter is 1, `TLM.prompt()` simply returns a standard LLM response and does not attempt to auto-improve it.
+        This parameter has no effect when `disable_trustworthiness` is True.
 
-        use_self_reflection (bool, default = `True`): deprecated. Use `num_self_reflections` instead.
+        disable_trustworthiness (bool, default = False): if True, trustworthiness scoring is disabled and TLM will not compute trust scores for responses.
+        This is useful when you only want to use custom evaluation criteria or when you want to minimize computational overhead and only need the base LLM response.
+        The following parameters will be ignored when `disable_trustworthiness` is True: `num_consistency_samples`, `num_self_reflections`, `num_candidate_responses`, `reasoning_effort`, `similarity_measure`.
     """
 
     quality_preset: Literal["best", "high", "medium", "low", "base"]
@@ -112,6 +117,10 @@ class TlmScoreParams(TypedDict, total=False):
 class Options(TypedDict, total=False):
     custom_eval_criteria: Iterable[object]
 
+    disable_persistence: bool
+
+    disable_trustworthiness: bool
+
     log: List[str]
 
     max_tokens: int
diff --git a/tests/api_resources/organizations/billing/test_card_details.py b/tests/api_resources/organizations/billing/test_card_details.py
index 3a034833..e4468456 100644
--- a/tests/api_resources/organizations/billing/test_card_details.py
+++ b/tests/api_resources/organizations/billing/test_card_details.py
@@ -17,7 +17,7 @@
 class TestCardDetails:
     parametrize = pytest.mark.parametrize("client", [False, True], indirect=True, ids=["loose", "strict"])
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     def test_method_retrieve(self, client: Codex) -> None:
         card_detail = client.organizations.billing.card_details.retrieve(
@@ -25,7 +25,7 @@ def test_method_retrieve(self, client: Codex) -> None:
         )
         assert_matches_type(Optional[OrganizationBillingCardDetails], card_detail, path=["response"])
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     def test_raw_response_retrieve(self, client: Codex) -> None:
         response = client.organizations.billing.card_details.with_raw_response.retrieve(
@@ -37,7 +37,7 @@ def test_raw_response_retrieve(self, client: Codex) -> None:
         card_detail = response.parse()
         assert_matches_type(Optional[OrganizationBillingCardDetails], card_detail, path=["response"])
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     def test_streaming_response_retrieve(self, client: Codex) -> None:
         with client.organizations.billing.card_details.with_streaming_response.retrieve(
@@ -51,7 +51,7 @@ def test_streaming_response_retrieve(self, client: Codex) -> None:
 
         assert cast(Any, response.is_closed) is True
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     def test_path_params_retrieve(self, client: Codex) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `organization_id` but received ''"):
@@ -65,7 +65,7 @@ class TestAsyncCardDetails:
         "async_client", [False, True, {"http_client": "aiohttp"}], indirect=True, ids=["loose", "strict", "aiohttp"]
     )
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     async def test_method_retrieve(self, async_client: AsyncCodex) -> None:
         card_detail = await async_client.organizations.billing.card_details.retrieve(
@@ -73,7 +73,7 @@ async def test_method_retrieve(self, async_client: AsyncCodex) -> None:
         )
         assert_matches_type(Optional[OrganizationBillingCardDetails], card_detail, path=["response"])
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     async def test_raw_response_retrieve(self, async_client: AsyncCodex) -> None:
         response = await async_client.organizations.billing.card_details.with_raw_response.retrieve(
@@ -85,7 +85,7 @@ async def test_raw_response_retrieve(self, async_client: AsyncCodex) -> None:
         card_detail = await response.parse()
         assert_matches_type(Optional[OrganizationBillingCardDetails], card_detail, path=["response"])
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     async def test_streaming_response_retrieve(self, async_client: AsyncCodex) -> None:
         async with async_client.organizations.billing.card_details.with_streaming_response.retrieve(
@@ -99,7 +99,7 @@ async def test_streaming_response_retrieve(self, async_client: AsyncCodex) -> No
 
         assert cast(Any, response.is_closed) is True
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     async def test_path_params_retrieve(self, async_client: AsyncCodex) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `organization_id` but received ''"):
diff --git a/tests/api_resources/organizations/billing/test_plan_details.py b/tests/api_resources/organizations/billing/test_plan_details.py
index 76d9732e..1e3c36fb 100644
--- a/tests/api_resources/organizations/billing/test_plan_details.py
+++ b/tests/api_resources/organizations/billing/test_plan_details.py
@@ -17,7 +17,7 @@
 class TestPlanDetails:
     parametrize = pytest.mark.parametrize("client", [False, True], indirect=True, ids=["loose", "strict"])
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     def test_method_retrieve(self, client: Codex) -> None:
         plan_detail = client.organizations.billing.plan_details.retrieve(
@@ -25,7 +25,7 @@ def test_method_retrieve(self, client: Codex) -> None:
         )
         assert_matches_type(OrganizationBillingPlanDetails, plan_detail, path=["response"])
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     def test_raw_response_retrieve(self, client: Codex) -> None:
         response = client.organizations.billing.plan_details.with_raw_response.retrieve(
@@ -37,7 +37,7 @@ def test_raw_response_retrieve(self, client: Codex) -> None:
         plan_detail = response.parse()
         assert_matches_type(OrganizationBillingPlanDetails, plan_detail, path=["response"])
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     def test_streaming_response_retrieve(self, client: Codex) -> None:
         with client.organizations.billing.plan_details.with_streaming_response.retrieve(
@@ -51,7 +51,7 @@ def test_streaming_response_retrieve(self, client: Codex) -> None:
 
         assert cast(Any, response.is_closed) is True
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     def test_path_params_retrieve(self, client: Codex) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `organization_id` but received ''"):
@@ -65,7 +65,7 @@ class TestAsyncPlanDetails:
         "async_client", [False, True, {"http_client": "aiohttp"}], indirect=True, ids=["loose", "strict", "aiohttp"]
     )
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     async def test_method_retrieve(self, async_client: AsyncCodex) -> None:
         plan_detail = await async_client.organizations.billing.plan_details.retrieve(
@@ -73,7 +73,7 @@ async def test_method_retrieve(self, async_client: AsyncCodex) -> None:
         )
         assert_matches_type(OrganizationBillingPlanDetails, plan_detail, path=["response"])
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     async def test_raw_response_retrieve(self, async_client: AsyncCodex) -> None:
         response = await async_client.organizations.billing.plan_details.with_raw_response.retrieve(
@@ -85,7 +85,7 @@ async def test_raw_response_retrieve(self, async_client: AsyncCodex) -> None:
         plan_detail = await response.parse()
         assert_matches_type(OrganizationBillingPlanDetails, plan_detail, path=["response"])
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     async def test_streaming_response_retrieve(self, async_client: AsyncCodex) -> None:
         async with async_client.organizations.billing.plan_details.with_streaming_response.retrieve(
@@ -99,7 +99,7 @@ async def test_streaming_response_retrieve(self, async_client: AsyncCodex) -> No
 
         assert cast(Any, response.is_closed) is True
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     async def test_path_params_retrieve(self, async_client: AsyncCodex) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `organization_id` but received ''"):
diff --git a/tests/api_resources/organizations/billing/test_setup_intent.py b/tests/api_resources/organizations/billing/test_setup_intent.py
index 49d80b0d..edc3372b 100644
--- a/tests/api_resources/organizations/billing/test_setup_intent.py
+++ b/tests/api_resources/organizations/billing/test_setup_intent.py
@@ -17,7 +17,7 @@
 class TestSetupIntent:
     parametrize = pytest.mark.parametrize("client", [False, True], indirect=True, ids=["loose", "strict"])
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     def test_method_create(self, client: Codex) -> None:
         setup_intent = client.organizations.billing.setup_intent.create(
@@ -25,7 +25,7 @@ def test_method_create(self, client: Codex) -> None:
         )
         assert_matches_type(OrganizationBillingSetupIntent, setup_intent, path=["response"])
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     def test_raw_response_create(self, client: Codex) -> None:
         response = client.organizations.billing.setup_intent.with_raw_response.create(
@@ -37,7 +37,7 @@ def test_raw_response_create(self, client: Codex) -> None:
         setup_intent = response.parse()
         assert_matches_type(OrganizationBillingSetupIntent, setup_intent, path=["response"])
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     def test_streaming_response_create(self, client: Codex) -> None:
         with client.organizations.billing.setup_intent.with_streaming_response.create(
@@ -51,7 +51,7 @@ def test_streaming_response_create(self, client: Codex) -> None:
 
         assert cast(Any, response.is_closed) is True
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     def test_path_params_create(self, client: Codex) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `organization_id` but received ''"):
@@ -65,7 +65,7 @@ class TestAsyncSetupIntent:
         "async_client", [False, True, {"http_client": "aiohttp"}], indirect=True, ids=["loose", "strict", "aiohttp"]
     )
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     async def test_method_create(self, async_client: AsyncCodex) -> None:
         setup_intent = await async_client.organizations.billing.setup_intent.create(
@@ -73,7 +73,7 @@ async def test_method_create(self, async_client: AsyncCodex) -> None:
         )
         assert_matches_type(OrganizationBillingSetupIntent, setup_intent, path=["response"])
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     async def test_raw_response_create(self, async_client: AsyncCodex) -> None:
         response = await async_client.organizations.billing.setup_intent.with_raw_response.create(
@@ -85,7 +85,7 @@ async def test_raw_response_create(self, async_client: AsyncCodex) -> None:
         setup_intent = await response.parse()
         assert_matches_type(OrganizationBillingSetupIntent, setup_intent, path=["response"])
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     async def test_streaming_response_create(self, async_client: AsyncCodex) -> None:
         async with async_client.organizations.billing.setup_intent.with_streaming_response.create(
@@ -99,7 +99,7 @@ async def test_streaming_response_create(self, async_client: AsyncCodex) -> None
 
         assert cast(Any, response.is_closed) is True
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     async def test_path_params_create(self, async_client: AsyncCodex) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `organization_id` but received ''"):
diff --git a/tests/api_resources/organizations/test_billing.py b/tests/api_resources/organizations/test_billing.py
index 237562b5..f13fa304 100644
--- a/tests/api_resources/organizations/test_billing.py
+++ b/tests/api_resources/organizations/test_billing.py
@@ -17,7 +17,7 @@
 class TestBilling:
     parametrize = pytest.mark.parametrize("client", [False, True], indirect=True, ids=["loose", "strict"])
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     def test_method_invoices(self, client: Codex) -> None:
         billing = client.organizations.billing.invoices(
@@ -25,7 +25,7 @@ def test_method_invoices(self, client: Codex) -> None:
         )
         assert_matches_type(OrganizationBillingInvoicesSchema, billing, path=["response"])
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     def test_raw_response_invoices(self, client: Codex) -> None:
         response = client.organizations.billing.with_raw_response.invoices(
@@ -37,7 +37,7 @@ def test_raw_response_invoices(self, client: Codex) -> None:
         billing = response.parse()
         assert_matches_type(OrganizationBillingInvoicesSchema, billing, path=["response"])
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     def test_streaming_response_invoices(self, client: Codex) -> None:
         with client.organizations.billing.with_streaming_response.invoices(
@@ -51,7 +51,7 @@ def test_streaming_response_invoices(self, client: Codex) -> None:
 
         assert cast(Any, response.is_closed) is True
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     def test_path_params_invoices(self, client: Codex) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `organization_id` but received ''"):
@@ -59,7 +59,7 @@ def test_path_params_invoices(self, client: Codex) -> None:
                 "",
             )
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     def test_method_usage(self, client: Codex) -> None:
         billing = client.organizations.billing.usage(
@@ -67,7 +67,7 @@ def test_method_usage(self, client: Codex) -> None:
         )
         assert_matches_type(OrganizationBillingUsageSchema, billing, path=["response"])
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     def test_raw_response_usage(self, client: Codex) -> None:
         response = client.organizations.billing.with_raw_response.usage(
@@ -79,7 +79,7 @@ def test_raw_response_usage(self, client: Codex) -> None:
         billing = response.parse()
         assert_matches_type(OrganizationBillingUsageSchema, billing, path=["response"])
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     def test_streaming_response_usage(self, client: Codex) -> None:
         with client.organizations.billing.with_streaming_response.usage(
@@ -93,7 +93,7 @@ def test_streaming_response_usage(self, client: Codex) -> None:
 
         assert cast(Any, response.is_closed) is True
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     def test_path_params_usage(self, client: Codex) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `organization_id` but received ''"):
@@ -107,7 +107,7 @@ class TestAsyncBilling:
         "async_client", [False, True, {"http_client": "aiohttp"}], indirect=True, ids=["loose", "strict", "aiohttp"]
     )
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     async def test_method_invoices(self, async_client: AsyncCodex) -> None:
         billing = await async_client.organizations.billing.invoices(
@@ -115,7 +115,7 @@ async def test_method_invoices(self, async_client: AsyncCodex) -> None:
         )
         assert_matches_type(OrganizationBillingInvoicesSchema, billing, path=["response"])
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     async def test_raw_response_invoices(self, async_client: AsyncCodex) -> None:
         response = await async_client.organizations.billing.with_raw_response.invoices(
@@ -127,7 +127,7 @@ async def test_raw_response_invoices(self, async_client: AsyncCodex) -> None:
         billing = await response.parse()
         assert_matches_type(OrganizationBillingInvoicesSchema, billing, path=["response"])
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     async def test_streaming_response_invoices(self, async_client: AsyncCodex) -> None:
         async with async_client.organizations.billing.with_streaming_response.invoices(
@@ -141,7 +141,7 @@ async def test_streaming_response_invoices(self, async_client: AsyncCodex) -> No
 
         assert cast(Any, response.is_closed) is True
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     async def test_path_params_invoices(self, async_client: AsyncCodex) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `organization_id` but received ''"):
@@ -149,7 +149,7 @@ async def test_path_params_invoices(self, async_client: AsyncCodex) -> None:
                 "",
             )
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     async def test_method_usage(self, async_client: AsyncCodex) -> None:
         billing = await async_client.organizations.billing.usage(
@@ -157,7 +157,7 @@ async def test_method_usage(self, async_client: AsyncCodex) -> None:
         )
         assert_matches_type(OrganizationBillingUsageSchema, billing, path=["response"])
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     async def test_raw_response_usage(self, async_client: AsyncCodex) -> None:
         response = await async_client.organizations.billing.with_raw_response.usage(
@@ -169,7 +169,7 @@ async def test_raw_response_usage(self, async_client: AsyncCodex) -> None:
         billing = await response.parse()
         assert_matches_type(OrganizationBillingUsageSchema, billing, path=["response"])
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     async def test_streaming_response_usage(self, async_client: AsyncCodex) -> None:
         async with async_client.organizations.billing.with_streaming_response.usage(
@@ -183,7 +183,7 @@ async def test_streaming_response_usage(self, async_client: AsyncCodex) -> None:
 
         assert cast(Any, response.is_closed) is True
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     async def test_path_params_usage(self, async_client: AsyncCodex) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `organization_id` but received ''"):
diff --git a/tests/api_resources/projects/test_access_keys.py b/tests/api_resources/projects/test_access_keys.py
index c3bc1785..13fc60ca 100644
--- a/tests/api_resources/projects/test_access_keys.py
+++ b/tests/api_resources/projects/test_access_keys.py
@@ -22,7 +22,7 @@
 class TestAccessKeys:
     parametrize = pytest.mark.parametrize("client", [False, True], indirect=True, ids=["loose", "strict"])
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     def test_method_create(self, client: Codex) -> None:
         access_key = client.projects.access_keys.create(
@@ -31,7 +31,7 @@ def test_method_create(self, client: Codex) -> None:
         )
         assert_matches_type(AccessKeySchema, access_key, path=["response"])
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     def test_method_create_with_all_params(self, client: Codex) -> None:
         access_key = client.projects.access_keys.create(
@@ -46,7 +46,7 @@ def test_method_create_with_all_params(self, client: Codex) -> None:
         )
         assert_matches_type(AccessKeySchema, access_key, path=["response"])
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     def test_raw_response_create(self, client: Codex) -> None:
         response = client.projects.access_keys.with_raw_response.create(
@@ -59,7 +59,7 @@ def test_raw_response_create(self, client: Codex) -> None:
         access_key = response.parse()
         assert_matches_type(AccessKeySchema, access_key, path=["response"])
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     def test_streaming_response_create(self, client: Codex) -> None:
         with client.projects.access_keys.with_streaming_response.create(
@@ -74,7 +74,7 @@ def test_streaming_response_create(self, client: Codex) -> None:
 
         assert cast(Any, response.is_closed) is True
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     def test_path_params_create(self, client: Codex) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `project_id` but received ''"):
@@ -83,7 +83,7 @@ def test_path_params_create(self, client: Codex) -> None:
                 name="name",
             )
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     def test_method_retrieve(self, client: Codex) -> None:
         access_key = client.projects.access_keys.retrieve(
@@ -92,7 +92,7 @@ def test_method_retrieve(self, client: Codex) -> None:
         )
         assert_matches_type(AccessKeySchema, access_key, path=["response"])
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     def test_raw_response_retrieve(self, client: Codex) -> None:
         response = client.projects.access_keys.with_raw_response.retrieve(
@@ -105,7 +105,7 @@ def test_raw_response_retrieve(self, client: Codex) -> None:
         access_key = response.parse()
         assert_matches_type(AccessKeySchema, access_key, path=["response"])
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     def test_streaming_response_retrieve(self, client: Codex) -> None:
         with client.projects.access_keys.with_streaming_response.retrieve(
@@ -120,7 +120,7 @@ def test_streaming_response_retrieve(self, client: Codex) -> None:
 
         assert cast(Any, response.is_closed) is True
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     def test_path_params_retrieve(self, client: Codex) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `project_id` but received ''"):
@@ -135,7 +135,7 @@ def test_path_params_retrieve(self, client: Codex) -> None:
                 project_id="182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e",
             )
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     def test_method_update(self, client: Codex) -> None:
         access_key = client.projects.access_keys.update(
@@ -145,7 +145,7 @@ def test_method_update(self, client: Codex) -> None:
         )
         assert_matches_type(AccessKeySchema, access_key, path=["response"])
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     def test_method_update_with_all_params(self, client: Codex) -> None:
         access_key = client.projects.access_keys.update(
@@ -157,7 +157,7 @@ def test_method_update_with_all_params(self, client: Codex) -> None:
         )
         assert_matches_type(AccessKeySchema, access_key, path=["response"])
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     def test_raw_response_update(self, client: Codex) -> None:
         response = client.projects.access_keys.with_raw_response.update(
@@ -171,7 +171,7 @@ def test_raw_response_update(self, client: Codex) -> None:
         access_key = response.parse()
         assert_matches_type(AccessKeySchema, access_key, path=["response"])
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     def test_streaming_response_update(self, client: Codex) -> None:
         with client.projects.access_keys.with_streaming_response.update(
@@ -187,7 +187,7 @@ def test_streaming_response_update(self, client: Codex) -> None:
 
         assert cast(Any, response.is_closed) is True
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     def test_path_params_update(self, client: Codex) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `project_id` but received ''"):
@@ -204,7 +204,7 @@ def test_path_params_update(self, client: Codex) -> None:
                 name="name",
             )
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     def test_method_list(self, client: Codex) -> None:
         access_key = client.projects.access_keys.list(
@@ -212,7 +212,7 @@ def test_method_list(self, client: Codex) -> None:
         )
         assert_matches_type(AccessKeyListResponse, access_key, path=["response"])
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     def test_raw_response_list(self, client: Codex) -> None:
         response = client.projects.access_keys.with_raw_response.list(
@@ -224,7 +224,7 @@ def test_raw_response_list(self, client: Codex) -> None:
         access_key = response.parse()
         assert_matches_type(AccessKeyListResponse, access_key, path=["response"])
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     def test_streaming_response_list(self, client: Codex) -> None:
         with client.projects.access_keys.with_streaming_response.list(
@@ -238,7 +238,7 @@ def test_streaming_response_list(self, client: Codex) -> None:
 
         assert cast(Any, response.is_closed) is True
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     def test_path_params_list(self, client: Codex) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `project_id` but received ''"):
@@ -246,7 +246,7 @@ def test_path_params_list(self, client: Codex) -> None:
                 "",
             )
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     def test_method_delete(self, client: Codex) -> None:
         access_key = client.projects.access_keys.delete(
@@ -255,7 +255,7 @@ def test_method_delete(self, client: Codex) -> None:
         )
         assert access_key is None
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     def test_raw_response_delete(self, client: Codex) -> None:
         response = client.projects.access_keys.with_raw_response.delete(
@@ -268,7 +268,7 @@ def test_raw_response_delete(self, client: Codex) -> None:
         access_key = response.parse()
         assert access_key is None
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     def test_streaming_response_delete(self, client: Codex) -> None:
         with client.projects.access_keys.with_streaming_response.delete(
@@ -283,7 +283,7 @@ def test_streaming_response_delete(self, client: Codex) -> None:
 
         assert cast(Any, response.is_closed) is True
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     def test_path_params_delete(self, client: Codex) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `project_id` but received ''"):
@@ -298,13 +298,13 @@ def test_path_params_delete(self, client: Codex) -> None:
                 project_id="182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e",
             )
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     def test_method_retrieve_project_id(self, client: Codex) -> None:
         access_key = client.projects.access_keys.retrieve_project_id()
         assert_matches_type(AccessKeyRetrieveProjectIDResponse, access_key, path=["response"])
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     def test_raw_response_retrieve_project_id(self, client: Codex) -> None:
         response = client.projects.access_keys.with_raw_response.retrieve_project_id()
@@ -314,7 +314,7 @@ def test_raw_response_retrieve_project_id(self, client: Codex) -> None:
         access_key = response.parse()
         assert_matches_type(AccessKeyRetrieveProjectIDResponse, access_key, path=["response"])
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     def test_streaming_response_retrieve_project_id(self, client: Codex) -> None:
         with client.projects.access_keys.with_streaming_response.retrieve_project_id() as response:
@@ -326,7 +326,7 @@ def test_streaming_response_retrieve_project_id(self, client: Codex) -> None:
 
         assert cast(Any, response.is_closed) is True
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     def test_method_revoke(self, client: Codex) -> None:
         access_key = client.projects.access_keys.revoke(
@@ -335,7 +335,7 @@ def test_method_revoke(self, client: Codex) -> None:
         )
         assert access_key is None
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     def test_raw_response_revoke(self, client: Codex) -> None:
         response = client.projects.access_keys.with_raw_response.revoke(
@@ -348,7 +348,7 @@ def test_raw_response_revoke(self, client: Codex) -> None:
         access_key = response.parse()
         assert access_key is None
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     def test_streaming_response_revoke(self, client: Codex) -> None:
         with client.projects.access_keys.with_streaming_response.revoke(
@@ -363,7 +363,7 @@ def test_streaming_response_revoke(self, client: Codex) -> None:
 
         assert cast(Any, response.is_closed) is True
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     def test_path_params_revoke(self, client: Codex) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `project_id` but received ''"):
@@ -384,7 +384,7 @@ class TestAsyncAccessKeys:
         "async_client", [False, True, {"http_client": "aiohttp"}], indirect=True, ids=["loose", "strict", "aiohttp"]
     )
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     async def test_method_create(self, async_client: AsyncCodex) -> None:
         access_key = await async_client.projects.access_keys.create(
@@ -393,7 +393,7 @@ async def test_method_create(self, async_client: AsyncCodex) -> None:
         )
         assert_matches_type(AccessKeySchema, access_key, path=["response"])
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     async def test_method_create_with_all_params(self, async_client: AsyncCodex) -> None:
         access_key = await async_client.projects.access_keys.create(
@@ -408,7 +408,7 @@ async def test_method_create_with_all_params(self, async_client: AsyncCodex) ->
         )
         assert_matches_type(AccessKeySchema, access_key, path=["response"])
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     async def test_raw_response_create(self, async_client: AsyncCodex) -> None:
         response = await async_client.projects.access_keys.with_raw_response.create(
@@ -421,7 +421,7 @@ async def test_raw_response_create(self, async_client: AsyncCodex) -> None:
         access_key = await response.parse()
         assert_matches_type(AccessKeySchema, access_key, path=["response"])
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     async def test_streaming_response_create(self, async_client: AsyncCodex) -> None:
         async with async_client.projects.access_keys.with_streaming_response.create(
@@ -436,7 +436,7 @@ async def test_streaming_response_create(self, async_client: AsyncCodex) -> None
 
         assert cast(Any, response.is_closed) is True
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     async def test_path_params_create(self, async_client: AsyncCodex) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `project_id` but received ''"):
@@ -445,7 +445,7 @@ async def test_path_params_create(self, async_client: AsyncCodex) -> None:
                 name="name",
             )
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     async def test_method_retrieve(self, async_client: AsyncCodex) -> None:
         access_key = await async_client.projects.access_keys.retrieve(
@@ -454,7 +454,7 @@ async def test_method_retrieve(self, async_client: AsyncCodex) -> None:
         )
         assert_matches_type(AccessKeySchema, access_key, path=["response"])
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     async def test_raw_response_retrieve(self, async_client: AsyncCodex) -> None:
         response = await async_client.projects.access_keys.with_raw_response.retrieve(
@@ -467,7 +467,7 @@ async def test_raw_response_retrieve(self, async_client: AsyncCodex) -> None:
         access_key = await response.parse()
         assert_matches_type(AccessKeySchema, access_key, path=["response"])
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     async def test_streaming_response_retrieve(self, async_client: AsyncCodex) -> None:
         async with async_client.projects.access_keys.with_streaming_response.retrieve(
@@ -482,7 +482,7 @@ async def test_streaming_response_retrieve(self, async_client: AsyncCodex) -> No
 
         assert cast(Any, response.is_closed) is True
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     async def test_path_params_retrieve(self, async_client: AsyncCodex) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `project_id` but received ''"):
@@ -497,7 +497,7 @@ async def test_path_params_retrieve(self, async_client: AsyncCodex) -> None:
                 project_id="182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e",
             )
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     async def test_method_update(self, async_client: AsyncCodex) -> None:
         access_key = await async_client.projects.access_keys.update(
@@ -507,7 +507,7 @@ async def test_method_update(self, async_client: AsyncCodex) -> None:
         )
         assert_matches_type(AccessKeySchema, access_key, path=["response"])
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     async def test_method_update_with_all_params(self, async_client: AsyncCodex) -> None:
         access_key = await async_client.projects.access_keys.update(
@@ -519,7 +519,7 @@ async def test_method_update_with_all_params(self, async_client: AsyncCodex) ->
         )
         assert_matches_type(AccessKeySchema, access_key, path=["response"])
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     async def test_raw_response_update(self, async_client: AsyncCodex) -> None:
         response = await async_client.projects.access_keys.with_raw_response.update(
@@ -533,7 +533,7 @@ async def test_raw_response_update(self, async_client: AsyncCodex) -> None:
         access_key = await response.parse()
         assert_matches_type(AccessKeySchema, access_key, path=["response"])
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     async def test_streaming_response_update(self, async_client: AsyncCodex) -> None:
         async with async_client.projects.access_keys.with_streaming_response.update(
@@ -549,7 +549,7 @@ async def test_streaming_response_update(self, async_client: AsyncCodex) -> None
 
         assert cast(Any, response.is_closed) is True
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     async def test_path_params_update(self, async_client: AsyncCodex) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `project_id` but received ''"):
@@ -566,7 +566,7 @@ async def test_path_params_update(self, async_client: AsyncCodex) -> None:
                 name="name",
             )
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     async def test_method_list(self, async_client: AsyncCodex) -> None:
         access_key = await async_client.projects.access_keys.list(
@@ -574,7 +574,7 @@ async def test_method_list(self, async_client: AsyncCodex) -> None:
         )
         assert_matches_type(AccessKeyListResponse, access_key, path=["response"])
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     async def test_raw_response_list(self, async_client: AsyncCodex) -> None:
         response = await async_client.projects.access_keys.with_raw_response.list(
@@ -586,7 +586,7 @@ async def test_raw_response_list(self, async_client: AsyncCodex) -> None:
         access_key = await response.parse()
         assert_matches_type(AccessKeyListResponse, access_key, path=["response"])
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     async def test_streaming_response_list(self, async_client: AsyncCodex) -> None:
         async with async_client.projects.access_keys.with_streaming_response.list(
@@ -600,7 +600,7 @@ async def test_streaming_response_list(self, async_client: AsyncCodex) -> None:
 
         assert cast(Any, response.is_closed) is True
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     async def test_path_params_list(self, async_client: AsyncCodex) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `project_id` but received ''"):
@@ -608,7 +608,7 @@ async def test_path_params_list(self, async_client: AsyncCodex) -> None:
                 "",
             )
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     async def test_method_delete(self, async_client: AsyncCodex) -> None:
         access_key = await async_client.projects.access_keys.delete(
@@ -617,7 +617,7 @@ async def test_method_delete(self, async_client: AsyncCodex) -> None:
         )
         assert access_key is None
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     async def test_raw_response_delete(self, async_client: AsyncCodex) -> None:
         response = await async_client.projects.access_keys.with_raw_response.delete(
@@ -630,7 +630,7 @@ async def test_raw_response_delete(self, async_client: AsyncCodex) -> None:
         access_key = await response.parse()
         assert access_key is None
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     async def test_streaming_response_delete(self, async_client: AsyncCodex) -> None:
         async with async_client.projects.access_keys.with_streaming_response.delete(
@@ -645,7 +645,7 @@ async def test_streaming_response_delete(self, async_client: AsyncCodex) -> None
 
         assert cast(Any, response.is_closed) is True
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     async def test_path_params_delete(self, async_client: AsyncCodex) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `project_id` but received ''"):
@@ -660,13 +660,13 @@ async def test_path_params_delete(self, async_client: AsyncCodex) -> None:
                 project_id="182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e",
             )
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     async def test_method_retrieve_project_id(self, async_client: AsyncCodex) -> None:
         access_key = await async_client.projects.access_keys.retrieve_project_id()
         assert_matches_type(AccessKeyRetrieveProjectIDResponse, access_key, path=["response"])
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     async def test_raw_response_retrieve_project_id(self, async_client: AsyncCodex) -> None:
         response = await async_client.projects.access_keys.with_raw_response.retrieve_project_id()
@@ -676,7 +676,7 @@ async def test_raw_response_retrieve_project_id(self, async_client: AsyncCodex)
         access_key = await response.parse()
         assert_matches_type(AccessKeyRetrieveProjectIDResponse, access_key, path=["response"])
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     async def test_streaming_response_retrieve_project_id(self, async_client: AsyncCodex) -> None:
         async with async_client.projects.access_keys.with_streaming_response.retrieve_project_id() as response:
@@ -688,7 +688,7 @@ async def test_streaming_response_retrieve_project_id(self, async_client: AsyncC
 
         assert cast(Any, response.is_closed) is True
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     async def test_method_revoke(self, async_client: AsyncCodex) -> None:
         access_key = await async_client.projects.access_keys.revoke(
@@ -697,7 +697,7 @@ async def test_method_revoke(self, async_client: AsyncCodex) -> None:
         )
         assert access_key is None
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     async def test_raw_response_revoke(self, async_client: AsyncCodex) -> None:
         response = await async_client.projects.access_keys.with_raw_response.revoke(
@@ -710,7 +710,7 @@ async def test_raw_response_revoke(self, async_client: AsyncCodex) -> None:
         access_key = await response.parse()
         assert access_key is None
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     async def test_streaming_response_revoke(self, async_client: AsyncCodex) -> None:
         async with async_client.projects.access_keys.with_streaming_response.revoke(
@@ -725,7 +725,7 @@ async def test_streaming_response_revoke(self, async_client: AsyncCodex) -> None
 
         assert cast(Any, response.is_closed) is True
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     async def test_path_params_revoke(self, async_client: AsyncCodex) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `project_id` but received ''"):
diff --git a/tests/api_resources/projects/test_evals.py b/tests/api_resources/projects/test_evals.py
index f36de276..7266751d 100644
--- a/tests/api_resources/projects/test_evals.py
+++ b/tests/api_resources/projects/test_evals.py
@@ -18,7 +18,7 @@
 class TestEvals:
     parametrize = pytest.mark.parametrize("client", [False, True], indirect=True, ids=["loose", "strict"])
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     def test_method_create(self, client: Codex) -> None:
         eval = client.projects.evals.create(
@@ -29,7 +29,7 @@ def test_method_create(self, client: Codex) -> None:
         )
         assert_matches_type(ProjectReturnSchema, eval, path=["response"])
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     def test_method_create_with_all_params(self, client: Codex) -> None:
         eval = client.projects.evals.create(
@@ -50,7 +50,7 @@ def test_method_create_with_all_params(self, client: Codex) -> None:
         )
         assert_matches_type(ProjectReturnSchema, eval, path=["response"])
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     def test_raw_response_create(self, client: Codex) -> None:
         response = client.projects.evals.with_raw_response.create(
@@ -65,7 +65,7 @@ def test_raw_response_create(self, client: Codex) -> None:
         eval = response.parse()
         assert_matches_type(ProjectReturnSchema, eval, path=["response"])
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     def test_streaming_response_create(self, client: Codex) -> None:
         with client.projects.evals.with_streaming_response.create(
@@ -82,7 +82,7 @@ def test_streaming_response_create(self, client: Codex) -> None:
 
         assert cast(Any, response.is_closed) is True
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     def test_path_params_create(self, client: Codex) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `project_id` but received ''"):
@@ -93,7 +93,7 @@ def test_path_params_create(self, client: Codex) -> None:
                 name="name",
             )
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     def test_method_update_overload_1(self, client: Codex) -> None:
         eval = client.projects.evals.update(
@@ -105,7 +105,7 @@ def test_method_update_overload_1(self, client: Codex) -> None:
         )
         assert_matches_type(ProjectReturnSchema, eval, path=["response"])
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     def test_method_update_with_all_params_overload_1(self, client: Codex) -> None:
         eval = client.projects.evals.update(
@@ -127,7 +127,7 @@ def test_method_update_with_all_params_overload_1(self, client: Codex) -> None:
         )
         assert_matches_type(ProjectReturnSchema, eval, path=["response"])
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     def test_raw_response_update_overload_1(self, client: Codex) -> None:
         response = client.projects.evals.with_raw_response.update(
@@ -143,7 +143,7 @@ def test_raw_response_update_overload_1(self, client: Codex) -> None:
         eval = response.parse()
         assert_matches_type(ProjectReturnSchema, eval, path=["response"])
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     def test_streaming_response_update_overload_1(self, client: Codex) -> None:
         with client.projects.evals.with_streaming_response.update(
@@ -161,7 +161,7 @@ def test_streaming_response_update_overload_1(self, client: Codex) -> None:
 
         assert cast(Any, response.is_closed) is True
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     def test_path_params_update_overload_1(self, client: Codex) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `project_id` but received ''"):
@@ -182,7 +182,7 @@ def test_path_params_update_overload_1(self, client: Codex) -> None:
                 name="name",
             )
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     def test_method_update_overload_2(self, client: Codex) -> None:
         eval = client.projects.evals.update(
@@ -192,7 +192,7 @@ def test_method_update_overload_2(self, client: Codex) -> None:
         )
         assert_matches_type(ProjectReturnSchema, eval, path=["response"])
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     def test_method_update_with_all_params_overload_2(self, client: Codex) -> None:
         eval = client.projects.evals.update(
@@ -208,7 +208,7 @@ def test_method_update_with_all_params_overload_2(self, client: Codex) -> None:
         )
         assert_matches_type(ProjectReturnSchema, eval, path=["response"])
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     def test_raw_response_update_overload_2(self, client: Codex) -> None:
         response = client.projects.evals.with_raw_response.update(
@@ -222,7 +222,7 @@ def test_raw_response_update_overload_2(self, client: Codex) -> None:
         eval = response.parse()
         assert_matches_type(ProjectReturnSchema, eval, path=["response"])
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     def test_streaming_response_update_overload_2(self, client: Codex) -> None:
         with client.projects.evals.with_streaming_response.update(
@@ -238,7 +238,7 @@ def test_streaming_response_update_overload_2(self, client: Codex) -> None:
 
         assert cast(Any, response.is_closed) is True
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     def test_path_params_update_overload_2(self, client: Codex) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `project_id` but received ''"):
@@ -255,7 +255,7 @@ def test_path_params_update_overload_2(self, client: Codex) -> None:
                 body_eval_key="eval_key",
             )
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     def test_method_list(self, client: Codex) -> None:
         eval = client.projects.evals.list(
@@ -263,7 +263,7 @@ def test_method_list(self, client: Codex) -> None:
         )
         assert_matches_type(EvalListResponse, eval, path=["response"])
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     def test_method_list_with_all_params(self, client: Codex) -> None:
         eval = client.projects.evals.list(
@@ -274,7 +274,7 @@ def test_method_list_with_all_params(self, client: Codex) -> None:
         )
         assert_matches_type(EvalListResponse, eval, path=["response"])
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     def test_raw_response_list(self, client: Codex) -> None:
         response = client.projects.evals.with_raw_response.list(
@@ -286,7 +286,7 @@ def test_raw_response_list(self, client: Codex) -> None:
         eval = response.parse()
         assert_matches_type(EvalListResponse, eval, path=["response"])
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     def test_streaming_response_list(self, client: Codex) -> None:
         with client.projects.evals.with_streaming_response.list(
@@ -300,7 +300,7 @@ def test_streaming_response_list(self, client: Codex) -> None:
 
         assert cast(Any, response.is_closed) is True
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     def test_path_params_list(self, client: Codex) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `project_id` but received ''"):
@@ -308,7 +308,7 @@ def test_path_params_list(self, client: Codex) -> None:
                 project_id="",
             )
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     def test_method_delete(self, client: Codex) -> None:
         eval = client.projects.evals.delete(
@@ -317,7 +317,7 @@ def test_method_delete(self, client: Codex) -> None:
         )
         assert_matches_type(ProjectReturnSchema, eval, path=["response"])
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     def test_raw_response_delete(self, client: Codex) -> None:
         response = client.projects.evals.with_raw_response.delete(
@@ -330,7 +330,7 @@ def test_raw_response_delete(self, client: Codex) -> None:
         eval = response.parse()
         assert_matches_type(ProjectReturnSchema, eval, path=["response"])
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     def test_streaming_response_delete(self, client: Codex) -> None:
         with client.projects.evals.with_streaming_response.delete(
@@ -345,7 +345,7 @@ def test_streaming_response_delete(self, client: Codex) -> None:
 
         assert cast(Any, response.is_closed) is True
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     def test_path_params_delete(self, client: Codex) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `project_id` but received ''"):
@@ -366,7 +366,7 @@ class TestAsyncEvals:
         "async_client", [False, True, {"http_client": "aiohttp"}], indirect=True, ids=["loose", "strict", "aiohttp"]
     )
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     async def test_method_create(self, async_client: AsyncCodex) -> None:
         eval = await async_client.projects.evals.create(
@@ -377,7 +377,7 @@ async def test_method_create(self, async_client: AsyncCodex) -> None:
         )
         assert_matches_type(ProjectReturnSchema, eval, path=["response"])
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     async def test_method_create_with_all_params(self, async_client: AsyncCodex) -> None:
         eval = await async_client.projects.evals.create(
@@ -398,7 +398,7 @@ async def test_method_create_with_all_params(self, async_client: AsyncCodex) ->
         )
         assert_matches_type(ProjectReturnSchema, eval, path=["response"])
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     async def test_raw_response_create(self, async_client: AsyncCodex) -> None:
         response = await async_client.projects.evals.with_raw_response.create(
@@ -413,7 +413,7 @@ async def test_raw_response_create(self, async_client: AsyncCodex) -> None:
         eval = await response.parse()
         assert_matches_type(ProjectReturnSchema, eval, path=["response"])
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     async def test_streaming_response_create(self, async_client: AsyncCodex) -> None:
         async with async_client.projects.evals.with_streaming_response.create(
@@ -430,7 +430,7 @@ async def test_streaming_response_create(self, async_client: AsyncCodex) -> None
 
         assert cast(Any, response.is_closed) is True
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     async def test_path_params_create(self, async_client: AsyncCodex) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `project_id` but received ''"):
@@ -441,7 +441,7 @@ async def test_path_params_create(self, async_client: AsyncCodex) -> None:
                 name="name",
             )
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     async def test_method_update_overload_1(self, async_client: AsyncCodex) -> None:
         eval = await async_client.projects.evals.update(
@@ -453,7 +453,7 @@ async def test_method_update_overload_1(self, async_client: AsyncCodex) -> None:
         )
         assert_matches_type(ProjectReturnSchema, eval, path=["response"])
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     async def test_method_update_with_all_params_overload_1(self, async_client: AsyncCodex) -> None:
         eval = await async_client.projects.evals.update(
@@ -475,7 +475,7 @@ async def test_method_update_with_all_params_overload_1(self, async_client: Asyn
         )
         assert_matches_type(ProjectReturnSchema, eval, path=["response"])
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     async def test_raw_response_update_overload_1(self, async_client: AsyncCodex) -> None:
         response = await async_client.projects.evals.with_raw_response.update(
@@ -491,7 +491,7 @@ async def test_raw_response_update_overload_1(self, async_client: AsyncCodex) ->
         eval = await response.parse()
         assert_matches_type(ProjectReturnSchema, eval, path=["response"])
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     async def test_streaming_response_update_overload_1(self, async_client: AsyncCodex) -> None:
         async with async_client.projects.evals.with_streaming_response.update(
@@ -509,7 +509,7 @@ async def test_streaming_response_update_overload_1(self, async_client: AsyncCod
 
         assert cast(Any, response.is_closed) is True
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     async def test_path_params_update_overload_1(self, async_client: AsyncCodex) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `project_id` but received ''"):
@@ -530,7 +530,7 @@ async def test_path_params_update_overload_1(self, async_client: AsyncCodex) ->
                 name="name",
             )
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     async def test_method_update_overload_2(self, async_client: AsyncCodex) -> None:
         eval = await async_client.projects.evals.update(
@@ -540,7 +540,7 @@ async def test_method_update_overload_2(self, async_client: AsyncCodex) -> None:
         )
         assert_matches_type(ProjectReturnSchema, eval, path=["response"])
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     async def test_method_update_with_all_params_overload_2(self, async_client: AsyncCodex) -> None:
         eval = await async_client.projects.evals.update(
@@ -556,7 +556,7 @@ async def test_method_update_with_all_params_overload_2(self, async_client: Asyn
         )
         assert_matches_type(ProjectReturnSchema, eval, path=["response"])
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     async def test_raw_response_update_overload_2(self, async_client: AsyncCodex) -> None:
         response = await async_client.projects.evals.with_raw_response.update(
@@ -570,7 +570,7 @@ async def test_raw_response_update_overload_2(self, async_client: AsyncCodex) ->
         eval = await response.parse()
         assert_matches_type(ProjectReturnSchema, eval, path=["response"])
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     async def test_streaming_response_update_overload_2(self, async_client: AsyncCodex) -> None:
         async with async_client.projects.evals.with_streaming_response.update(
@@ -586,7 +586,7 @@ async def test_streaming_response_update_overload_2(self, async_client: AsyncCod
 
         assert cast(Any, response.is_closed) is True
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     async def test_path_params_update_overload_2(self, async_client: AsyncCodex) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `project_id` but received ''"):
@@ -603,7 +603,7 @@ async def test_path_params_update_overload_2(self, async_client: AsyncCodex) ->
                 body_eval_key="eval_key",
             )
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     async def test_method_list(self, async_client: AsyncCodex) -> None:
         eval = await async_client.projects.evals.list(
@@ -611,7 +611,7 @@ async def test_method_list(self, async_client: AsyncCodex) -> None:
         )
         assert_matches_type(EvalListResponse, eval, path=["response"])
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     async def test_method_list_with_all_params(self, async_client: AsyncCodex) -> None:
         eval = await async_client.projects.evals.list(
@@ -622,7 +622,7 @@ async def test_method_list_with_all_params(self, async_client: AsyncCodex) -> No
         )
         assert_matches_type(EvalListResponse, eval, path=["response"])
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     async def test_raw_response_list(self, async_client: AsyncCodex) -> None:
         response = await async_client.projects.evals.with_raw_response.list(
@@ -634,7 +634,7 @@ async def test_raw_response_list(self, async_client: AsyncCodex) -> None:
         eval = await response.parse()
         assert_matches_type(EvalListResponse, eval, path=["response"])
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     async def test_streaming_response_list(self, async_client: AsyncCodex) -> None:
         async with async_client.projects.evals.with_streaming_response.list(
@@ -648,7 +648,7 @@ async def test_streaming_response_list(self, async_client: AsyncCodex) -> None:
 
         assert cast(Any, response.is_closed) is True
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     async def test_path_params_list(self, async_client: AsyncCodex) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `project_id` but received ''"):
@@ -656,7 +656,7 @@ async def test_path_params_list(self, async_client: AsyncCodex) -> None:
                 project_id="",
             )
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     async def test_method_delete(self, async_client: AsyncCodex) -> None:
         eval = await async_client.projects.evals.delete(
@@ -665,7 +665,7 @@ async def test_method_delete(self, async_client: AsyncCodex) -> None:
         )
         assert_matches_type(ProjectReturnSchema, eval, path=["response"])
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     async def test_raw_response_delete(self, async_client: AsyncCodex) -> None:
         response = await async_client.projects.evals.with_raw_response.delete(
@@ -678,7 +678,7 @@ async def test_raw_response_delete(self, async_client: AsyncCodex) -> None:
         eval = await response.parse()
         assert_matches_type(ProjectReturnSchema, eval, path=["response"])
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     async def test_streaming_response_delete(self, async_client: AsyncCodex) -> None:
         async with async_client.projects.evals.with_streaming_response.delete(
@@ -693,7 +693,7 @@ async def test_streaming_response_delete(self, async_client: AsyncCodex) -> None
 
         assert cast(Any, response.is_closed) is True
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     async def test_path_params_delete(self, async_client: AsyncCodex) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `project_id` but received ''"):
diff --git a/tests/api_resources/projects/test_query_logs.py b/tests/api_resources/projects/test_query_logs.py
index cd4cd7d2..5f7e02cd 100644
--- a/tests/api_resources/projects/test_query_logs.py
+++ b/tests/api_resources/projects/test_query_logs.py
@@ -30,7 +30,7 @@
 class TestQueryLogs:
     parametrize = pytest.mark.parametrize("client", [False, True], indirect=True, ids=["loose", "strict"])
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     def test_method_retrieve(self, client: Codex) -> None:
         query_log = client.projects.query_logs.retrieve(
@@ -39,7 +39,7 @@ def test_method_retrieve(self, client: Codex) -> None:
         )
         assert_matches_type(QueryLogRetrieveResponse, query_log, path=["response"])
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     def test_raw_response_retrieve(self, client: Codex) -> None:
         response = client.projects.query_logs.with_raw_response.retrieve(
@@ -52,7 +52,7 @@ def test_raw_response_retrieve(self, client: Codex) -> None:
         query_log = response.parse()
         assert_matches_type(QueryLogRetrieveResponse, query_log, path=["response"])
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     def test_streaming_response_retrieve(self, client: Codex) -> None:
         with client.projects.query_logs.with_streaming_response.retrieve(
@@ -67,7 +67,7 @@ def test_streaming_response_retrieve(self, client: Codex) -> None:
 
         assert cast(Any, response.is_closed) is True
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     def test_path_params_retrieve(self, client: Codex) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `project_id` but received ''"):
@@ -82,7 +82,7 @@ def test_path_params_retrieve(self, client: Codex) -> None:
                 project_id="182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e",
             )
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     def test_method_list(self, client: Codex) -> None:
         query_log = client.projects.query_logs.list(
@@ -90,7 +90,7 @@ def test_method_list(self, client: Codex) -> None:
         )
         assert_matches_type(SyncOffsetPageQueryLogs[QueryLogListResponse], query_log, path=["response"])
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     def test_method_list_with_all_params(self, client: Codex) -> None:
         query_log = client.projects.query_logs.list(
@@ -100,17 +100,19 @@ def test_method_list_with_all_params(self, client: Codex) -> None:
             custom_metadata="custom_metadata",
             failed_evals=["string"],
             guardrailed=True,
+            has_tool_calls=True,
             limit=1,
             offset=0,
             order="asc",
             passed_evals=["string"],
             primary_eval_issue=["hallucination"],
             sort="created_at",
+            tool_call_names=["string"],
             was_cache_hit=True,
         )
         assert_matches_type(SyncOffsetPageQueryLogs[QueryLogListResponse], query_log, path=["response"])
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     def test_raw_response_list(self, client: Codex) -> None:
         response = client.projects.query_logs.with_raw_response.list(
@@ -122,7 +124,7 @@ def test_raw_response_list(self, client: Codex) -> None:
         query_log = response.parse()
         assert_matches_type(SyncOffsetPageQueryLogs[QueryLogListResponse], query_log, path=["response"])
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     def test_streaming_response_list(self, client: Codex) -> None:
         with client.projects.query_logs.with_streaming_response.list(
@@ -136,7 +138,7 @@ def test_streaming_response_list(self, client: Codex) -> None:
 
         assert cast(Any, response.is_closed) is True
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     def test_path_params_list(self, client: Codex) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `project_id` but received ''"):
@@ -144,7 +146,7 @@ def test_path_params_list(self, client: Codex) -> None:
                 project_id="",
             )
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     def test_method_list_by_group(self, client: Codex) -> None:
         query_log = client.projects.query_logs.list_by_group(
@@ -152,7 +154,7 @@ def test_method_list_by_group(self, client: Codex) -> None:
         )
         assert_matches_type(QueryLogListByGroupResponse, query_log, path=["response"])
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     def test_method_list_by_group_with_all_params(self, client: Codex) -> None:
         query_log = client.projects.query_logs.list_by_group(
@@ -162,6 +164,7 @@ def test_method_list_by_group_with_all_params(self, client: Codex) -> None:
             custom_metadata="custom_metadata",
             failed_evals=["string"],
             guardrailed=True,
+            has_tool_calls=True,
             limit=1,
             needs_review=True,
             offset=0,
@@ -170,11 +173,12 @@ def test_method_list_by_group_with_all_params(self, client: Codex) -> None:
             primary_eval_issue=["hallucination"],
             remediation_ids=["182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e"],
             sort="created_at",
+            tool_call_names=["string"],
             was_cache_hit=True,
         )
         assert_matches_type(QueryLogListByGroupResponse, query_log, path=["response"])
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     def test_raw_response_list_by_group(self, client: Codex) -> None:
         response = client.projects.query_logs.with_raw_response.list_by_group(
@@ -186,7 +190,7 @@ def test_raw_response_list_by_group(self, client: Codex) -> None:
         query_log = response.parse()
         assert_matches_type(QueryLogListByGroupResponse, query_log, path=["response"])
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     def test_streaming_response_list_by_group(self, client: Codex) -> None:
         with client.projects.query_logs.with_streaming_response.list_by_group(
@@ -200,7 +204,7 @@ def test_streaming_response_list_by_group(self, client: Codex) -> None:
 
         assert cast(Any, response.is_closed) is True
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     def test_path_params_list_by_group(self, client: Codex) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `project_id` but received ''"):
@@ -208,7 +212,7 @@ def test_path_params_list_by_group(self, client: Codex) -> None:
                 project_id="",
             )
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     def test_method_list_groups(self, client: Codex) -> None:
         query_log = client.projects.query_logs.list_groups(
@@ -216,7 +220,7 @@ def test_method_list_groups(self, client: Codex) -> None:
         )
         assert_matches_type(SyncOffsetPageQueryLogGroups[QueryLogListGroupsResponse], query_log, path=["response"])
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     def test_method_list_groups_with_all_params(self, client: Codex) -> None:
         query_log = client.projects.query_logs.list_groups(
@@ -226,6 +230,7 @@ def test_method_list_groups_with_all_params(self, client: Codex) -> None:
             custom_metadata="custom_metadata",
             failed_evals=["string"],
             guardrailed=True,
+            has_tool_calls=True,
             limit=1,
             needs_review=True,
             offset=0,
@@ -233,11 +238,12 @@ def test_method_list_groups_with_all_params(self, client: Codex) -> None:
             passed_evals=["string"],
             primary_eval_issue=["hallucination"],
             sort="created_at",
+            tool_call_names=["string"],
             was_cache_hit=True,
         )
         assert_matches_type(SyncOffsetPageQueryLogGroups[QueryLogListGroupsResponse], query_log, path=["response"])
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     def test_raw_response_list_groups(self, client: Codex) -> None:
         response = client.projects.query_logs.with_raw_response.list_groups(
@@ -249,7 +255,7 @@ def test_raw_response_list_groups(self, client: Codex) -> None:
         query_log = response.parse()
         assert_matches_type(SyncOffsetPageQueryLogGroups[QueryLogListGroupsResponse], query_log, path=["response"])
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     def test_streaming_response_list_groups(self, client: Codex) -> None:
         with client.projects.query_logs.with_streaming_response.list_groups(
@@ -263,7 +269,7 @@ def test_streaming_response_list_groups(self, client: Codex) -> None:
 
         assert cast(Any, response.is_closed) is True
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     def test_path_params_list_groups(self, client: Codex) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `project_id` but received ''"):
@@ -271,7 +277,7 @@ def test_path_params_list_groups(self, client: Codex) -> None:
                 project_id="",
             )
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     def test_method_start_remediation(self, client: Codex) -> None:
         query_log = client.projects.query_logs.start_remediation(
@@ -280,7 +286,7 @@ def test_method_start_remediation(self, client: Codex) -> None:
         )
         assert_matches_type(QueryLogStartRemediationResponse, query_log, path=["response"])
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     def test_raw_response_start_remediation(self, client: Codex) -> None:
         response = client.projects.query_logs.with_raw_response.start_remediation(
@@ -293,7 +299,7 @@ def test_raw_response_start_remediation(self, client: Codex) -> None:
         query_log = response.parse()
         assert_matches_type(QueryLogStartRemediationResponse, query_log, path=["response"])
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     def test_streaming_response_start_remediation(self, client: Codex) -> None:
         with client.projects.query_logs.with_streaming_response.start_remediation(
@@ -308,7 +314,7 @@ def test_streaming_response_start_remediation(self, client: Codex) -> None:
 
         assert cast(Any, response.is_closed) is True
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     def test_path_params_start_remediation(self, client: Codex) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `project_id` but received ''"):
@@ -329,7 +335,7 @@ class TestAsyncQueryLogs:
         "async_client", [False, True, {"http_client": "aiohttp"}], indirect=True, ids=["loose", "strict", "aiohttp"]
     )
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     async def test_method_retrieve(self, async_client: AsyncCodex) -> None:
         query_log = await async_client.projects.query_logs.retrieve(
@@ -338,7 +344,7 @@ async def test_method_retrieve(self, async_client: AsyncCodex) -> None:
         )
         assert_matches_type(QueryLogRetrieveResponse, query_log, path=["response"])
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     async def test_raw_response_retrieve(self, async_client: AsyncCodex) -> None:
         response = await async_client.projects.query_logs.with_raw_response.retrieve(
@@ -351,7 +357,7 @@ async def test_raw_response_retrieve(self, async_client: AsyncCodex) -> None:
         query_log = await response.parse()
         assert_matches_type(QueryLogRetrieveResponse, query_log, path=["response"])
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     async def test_streaming_response_retrieve(self, async_client: AsyncCodex) -> None:
         async with async_client.projects.query_logs.with_streaming_response.retrieve(
@@ -366,7 +372,7 @@ async def test_streaming_response_retrieve(self, async_client: AsyncCodex) -> No
 
         assert cast(Any, response.is_closed) is True
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     async def test_path_params_retrieve(self, async_client: AsyncCodex) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `project_id` but received ''"):
@@ -381,7 +387,7 @@ async def test_path_params_retrieve(self, async_client: AsyncCodex) -> None:
                 project_id="182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e",
             )
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     async def test_method_list(self, async_client: AsyncCodex) -> None:
         query_log = await async_client.projects.query_logs.list(
@@ -389,7 +395,7 @@ async def test_method_list(self, async_client: AsyncCodex) -> None:
         )
         assert_matches_type(AsyncOffsetPageQueryLogs[QueryLogListResponse], query_log, path=["response"])
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     async def test_method_list_with_all_params(self, async_client: AsyncCodex) -> None:
         query_log = await async_client.projects.query_logs.list(
@@ -399,17 +405,19 @@ async def test_method_list_with_all_params(self, async_client: AsyncCodex) -> No
             custom_metadata="custom_metadata",
             failed_evals=["string"],
             guardrailed=True,
+            has_tool_calls=True,
             limit=1,
             offset=0,
             order="asc",
             passed_evals=["string"],
             primary_eval_issue=["hallucination"],
             sort="created_at",
+            tool_call_names=["string"],
             was_cache_hit=True,
         )
         assert_matches_type(AsyncOffsetPageQueryLogs[QueryLogListResponse], query_log, path=["response"])
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     async def test_raw_response_list(self, async_client: AsyncCodex) -> None:
         response = await async_client.projects.query_logs.with_raw_response.list(
@@ -421,7 +429,7 @@ async def test_raw_response_list(self, async_client: AsyncCodex) -> None:
         query_log = await response.parse()
         assert_matches_type(AsyncOffsetPageQueryLogs[QueryLogListResponse], query_log, path=["response"])
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     async def test_streaming_response_list(self, async_client: AsyncCodex) -> None:
         async with async_client.projects.query_logs.with_streaming_response.list(
@@ -435,7 +443,7 @@ async def test_streaming_response_list(self, async_client: AsyncCodex) -> None:
 
         assert cast(Any, response.is_closed) is True
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     async def test_path_params_list(self, async_client: AsyncCodex) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `project_id` but received ''"):
@@ -443,7 +451,7 @@ async def test_path_params_list(self, async_client: AsyncCodex) -> None:
                 project_id="",
             )
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     async def test_method_list_by_group(self, async_client: AsyncCodex) -> None:
         query_log = await async_client.projects.query_logs.list_by_group(
@@ -451,7 +459,7 @@ async def test_method_list_by_group(self, async_client: AsyncCodex) -> None:
         )
         assert_matches_type(QueryLogListByGroupResponse, query_log, path=["response"])
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     async def test_method_list_by_group_with_all_params(self, async_client: AsyncCodex) -> None:
         query_log = await async_client.projects.query_logs.list_by_group(
@@ -461,6 +469,7 @@ async def test_method_list_by_group_with_all_params(self, async_client: AsyncCod
             custom_metadata="custom_metadata",
             failed_evals=["string"],
             guardrailed=True,
+            has_tool_calls=True,
             limit=1,
             needs_review=True,
             offset=0,
@@ -469,11 +478,12 @@ async def test_method_list_by_group_with_all_params(self, async_client: AsyncCod
             primary_eval_issue=["hallucination"],
             remediation_ids=["182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e"],
             sort="created_at",
+            tool_call_names=["string"],
             was_cache_hit=True,
         )
         assert_matches_type(QueryLogListByGroupResponse, query_log, path=["response"])
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     async def test_raw_response_list_by_group(self, async_client: AsyncCodex) -> None:
         response = await async_client.projects.query_logs.with_raw_response.list_by_group(
@@ -485,7 +495,7 @@ async def test_raw_response_list_by_group(self, async_client: AsyncCodex) -> Non
         query_log = await response.parse()
         assert_matches_type(QueryLogListByGroupResponse, query_log, path=["response"])
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     async def test_streaming_response_list_by_group(self, async_client: AsyncCodex) -> None:
         async with async_client.projects.query_logs.with_streaming_response.list_by_group(
@@ -499,7 +509,7 @@ async def test_streaming_response_list_by_group(self, async_client: AsyncCodex)
 
         assert cast(Any, response.is_closed) is True
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     async def test_path_params_list_by_group(self, async_client: AsyncCodex) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `project_id` but received ''"):
@@ -507,7 +517,7 @@ async def test_path_params_list_by_group(self, async_client: AsyncCodex) -> None
                 project_id="",
             )
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     async def test_method_list_groups(self, async_client: AsyncCodex) -> None:
         query_log = await async_client.projects.query_logs.list_groups(
@@ -515,7 +525,7 @@ async def test_method_list_groups(self, async_client: AsyncCodex) -> None:
         )
         assert_matches_type(AsyncOffsetPageQueryLogGroups[QueryLogListGroupsResponse], query_log, path=["response"])
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     async def test_method_list_groups_with_all_params(self, async_client: AsyncCodex) -> None:
         query_log = await async_client.projects.query_logs.list_groups(
@@ -525,6 +535,7 @@ async def test_method_list_groups_with_all_params(self, async_client: AsyncCodex
             custom_metadata="custom_metadata",
             failed_evals=["string"],
             guardrailed=True,
+            has_tool_calls=True,
             limit=1,
             needs_review=True,
             offset=0,
@@ -532,11 +543,12 @@ async def test_method_list_groups_with_all_params(self, async_client: AsyncCodex
             passed_evals=["string"],
             primary_eval_issue=["hallucination"],
             sort="created_at",
+            tool_call_names=["string"],
             was_cache_hit=True,
         )
         assert_matches_type(AsyncOffsetPageQueryLogGroups[QueryLogListGroupsResponse], query_log, path=["response"])
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     async def test_raw_response_list_groups(self, async_client: AsyncCodex) -> None:
         response = await async_client.projects.query_logs.with_raw_response.list_groups(
@@ -548,7 +560,7 @@ async def test_raw_response_list_groups(self, async_client: AsyncCodex) -> None:
         query_log = await response.parse()
         assert_matches_type(AsyncOffsetPageQueryLogGroups[QueryLogListGroupsResponse], query_log, path=["response"])
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     async def test_streaming_response_list_groups(self, async_client: AsyncCodex) -> None:
         async with async_client.projects.query_logs.with_streaming_response.list_groups(
@@ -562,7 +574,7 @@ async def test_streaming_response_list_groups(self, async_client: AsyncCodex) ->
 
         assert cast(Any, response.is_closed) is True
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     async def test_path_params_list_groups(self, async_client: AsyncCodex) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `project_id` but received ''"):
@@ -570,7 +582,7 @@ async def test_path_params_list_groups(self, async_client: AsyncCodex) -> None:
                 project_id="",
             )
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     async def test_method_start_remediation(self, async_client: AsyncCodex) -> None:
         query_log = await async_client.projects.query_logs.start_remediation(
@@ -579,7 +591,7 @@ async def test_method_start_remediation(self, async_client: AsyncCodex) -> None:
         )
         assert_matches_type(QueryLogStartRemediationResponse, query_log, path=["response"])
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     async def test_raw_response_start_remediation(self, async_client: AsyncCodex) -> None:
         response = await async_client.projects.query_logs.with_raw_response.start_remediation(
@@ -592,7 +604,7 @@ async def test_raw_response_start_remediation(self, async_client: AsyncCodex) ->
         query_log = await response.parse()
         assert_matches_type(QueryLogStartRemediationResponse, query_log, path=["response"])
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     async def test_streaming_response_start_remediation(self, async_client: AsyncCodex) -> None:
         async with async_client.projects.query_logs.with_streaming_response.start_remediation(
@@ -607,7 +619,7 @@ async def test_streaming_response_start_remediation(self, async_client: AsyncCod
 
         assert cast(Any, response.is_closed) is True
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     async def test_path_params_start_remediation(self, async_client: AsyncCodex) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `project_id` but received ''"):
diff --git a/tests/api_resources/projects/test_remediations.py b/tests/api_resources/projects/test_remediations.py
index 5866dbe7..d547ac85 100644
--- a/tests/api_resources/projects/test_remediations.py
+++ b/tests/api_resources/projects/test_remediations.py
@@ -30,7 +30,7 @@
 class TestRemediations:
     parametrize = pytest.mark.parametrize("client", [False, True], indirect=True, ids=["loose", "strict"])
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     def test_method_create(self, client: Codex) -> None:
         remediation = client.projects.remediations.create(
@@ -39,7 +39,7 @@ def test_method_create(self, client: Codex) -> None:
         )
         assert_matches_type(RemediationCreateResponse, remediation, path=["response"])
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     def test_method_create_with_all_params(self, client: Codex) -> None:
         remediation = client.projects.remediations.create(
@@ -50,7 +50,7 @@ def test_method_create_with_all_params(self, client: Codex) -> None:
         )
         assert_matches_type(RemediationCreateResponse, remediation, path=["response"])
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     def test_raw_response_create(self, client: Codex) -> None:
         response = client.projects.remediations.with_raw_response.create(
@@ -63,7 +63,7 @@ def test_raw_response_create(self, client: Codex) -> None:
         remediation = response.parse()
         assert_matches_type(RemediationCreateResponse, remediation, path=["response"])
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     def test_streaming_response_create(self, client: Codex) -> None:
         with client.projects.remediations.with_streaming_response.create(
@@ -78,7 +78,7 @@ def test_streaming_response_create(self, client: Codex) -> None:
 
         assert cast(Any, response.is_closed) is True
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     def test_path_params_create(self, client: Codex) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `project_id` but received ''"):
@@ -87,7 +87,7 @@ def test_path_params_create(self, client: Codex) -> None:
                 question="x",
             )
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     def test_method_retrieve(self, client: Codex) -> None:
         remediation = client.projects.remediations.retrieve(
@@ -96,7 +96,7 @@ def test_method_retrieve(self, client: Codex) -> None:
         )
         assert_matches_type(RemediationRetrieveResponse, remediation, path=["response"])
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     def test_raw_response_retrieve(self, client: Codex) -> None:
         response = client.projects.remediations.with_raw_response.retrieve(
@@ -109,7 +109,7 @@ def test_raw_response_retrieve(self, client: Codex) -> None:
         remediation = response.parse()
         assert_matches_type(RemediationRetrieveResponse, remediation, path=["response"])
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     def test_streaming_response_retrieve(self, client: Codex) -> None:
         with client.projects.remediations.with_streaming_response.retrieve(
@@ -124,7 +124,7 @@ def test_streaming_response_retrieve(self, client: Codex) -> None:
 
         assert cast(Any, response.is_closed) is True
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     def test_path_params_retrieve(self, client: Codex) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `project_id` but received ''"):
@@ -139,7 +139,7 @@ def test_path_params_retrieve(self, client: Codex) -> None:
                 project_id="182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e",
             )
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     def test_method_list(self, client: Codex) -> None:
         remediation = client.projects.remediations.list(
@@ -147,7 +147,7 @@ def test_method_list(self, client: Codex) -> None:
         )
         assert_matches_type(SyncOffsetPageRemediations[RemediationListResponse], remediation, path=["response"])
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     def test_method_list_with_all_params(self, client: Codex) -> None:
         remediation = client.projects.remediations.list(
@@ -165,7 +165,7 @@ def test_method_list_with_all_params(self, client: Codex) -> None:
         )
         assert_matches_type(SyncOffsetPageRemediations[RemediationListResponse], remediation, path=["response"])
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     def test_raw_response_list(self, client: Codex) -> None:
         response = client.projects.remediations.with_raw_response.list(
@@ -177,7 +177,7 @@ def test_raw_response_list(self, client: Codex) -> None:
         remediation = response.parse()
         assert_matches_type(SyncOffsetPageRemediations[RemediationListResponse], remediation, path=["response"])
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     def test_streaming_response_list(self, client: Codex) -> None:
         with client.projects.remediations.with_streaming_response.list(
@@ -191,7 +191,7 @@ def test_streaming_response_list(self, client: Codex) -> None:
 
         assert cast(Any, response.is_closed) is True
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     def test_path_params_list(self, client: Codex) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `project_id` but received ''"):
@@ -199,7 +199,7 @@ def test_path_params_list(self, client: Codex) -> None:
                 project_id="",
             )
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     def test_method_delete(self, client: Codex) -> None:
         remediation = client.projects.remediations.delete(
@@ -208,7 +208,7 @@ def test_method_delete(self, client: Codex) -> None:
         )
         assert remediation is None
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     def test_raw_response_delete(self, client: Codex) -> None:
         response = client.projects.remediations.with_raw_response.delete(
@@ -221,7 +221,7 @@ def test_raw_response_delete(self, client: Codex) -> None:
         remediation = response.parse()
         assert remediation is None
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     def test_streaming_response_delete(self, client: Codex) -> None:
         with client.projects.remediations.with_streaming_response.delete(
@@ -236,7 +236,7 @@ def test_streaming_response_delete(self, client: Codex) -> None:
 
         assert cast(Any, response.is_closed) is True
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     def test_path_params_delete(self, client: Codex) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `project_id` but received ''"):
@@ -251,7 +251,7 @@ def test_path_params_delete(self, client: Codex) -> None:
                 project_id="182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e",
             )
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     def test_method_edit_answer(self, client: Codex) -> None:
         remediation = client.projects.remediations.edit_answer(
@@ -261,7 +261,7 @@ def test_method_edit_answer(self, client: Codex) -> None:
         )
         assert_matches_type(RemediationEditAnswerResponse, remediation, path=["response"])
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     def test_raw_response_edit_answer(self, client: Codex) -> None:
         response = client.projects.remediations.with_raw_response.edit_answer(
@@ -275,7 +275,7 @@ def test_raw_response_edit_answer(self, client: Codex) -> None:
         remediation = response.parse()
         assert_matches_type(RemediationEditAnswerResponse, remediation, path=["response"])
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     def test_streaming_response_edit_answer(self, client: Codex) -> None:
         with client.projects.remediations.with_streaming_response.edit_answer(
@@ -291,7 +291,7 @@ def test_streaming_response_edit_answer(self, client: Codex) -> None:
 
         assert cast(Any, response.is_closed) is True
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     def test_path_params_edit_answer(self, client: Codex) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `project_id` but received ''"):
@@ -308,7 +308,7 @@ def test_path_params_edit_answer(self, client: Codex) -> None:
                 answer="answer",
             )
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     def test_method_edit_draft_answer(self, client: Codex) -> None:
         remediation = client.projects.remediations.edit_draft_answer(
@@ -318,7 +318,7 @@ def test_method_edit_draft_answer(self, client: Codex) -> None:
         )
         assert_matches_type(RemediationEditDraftAnswerResponse, remediation, path=["response"])
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     def test_raw_response_edit_draft_answer(self, client: Codex) -> None:
         response = client.projects.remediations.with_raw_response.edit_draft_answer(
@@ -332,7 +332,7 @@ def test_raw_response_edit_draft_answer(self, client: Codex) -> None:
         remediation = response.parse()
         assert_matches_type(RemediationEditDraftAnswerResponse, remediation, path=["response"])
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     def test_streaming_response_edit_draft_answer(self, client: Codex) -> None:
         with client.projects.remediations.with_streaming_response.edit_draft_answer(
@@ -348,7 +348,7 @@ def test_streaming_response_edit_draft_answer(self, client: Codex) -> None:
 
         assert cast(Any, response.is_closed) is True
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     def test_path_params_edit_draft_answer(self, client: Codex) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `project_id` but received ''"):
@@ -365,7 +365,7 @@ def test_path_params_edit_draft_answer(self, client: Codex) -> None:
                 draft_answer="draft_answer",
             )
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     def test_method_get_resolved_logs_count(self, client: Codex) -> None:
         remediation = client.projects.remediations.get_resolved_logs_count(
@@ -374,7 +374,7 @@ def test_method_get_resolved_logs_count(self, client: Codex) -> None:
         )
         assert_matches_type(RemediationGetResolvedLogsCountResponse, remediation, path=["response"])
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     def test_raw_response_get_resolved_logs_count(self, client: Codex) -> None:
         response = client.projects.remediations.with_raw_response.get_resolved_logs_count(
@@ -387,7 +387,7 @@ def test_raw_response_get_resolved_logs_count(self, client: Codex) -> None:
         remediation = response.parse()
         assert_matches_type(RemediationGetResolvedLogsCountResponse, remediation, path=["response"])
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     def test_streaming_response_get_resolved_logs_count(self, client: Codex) -> None:
         with client.projects.remediations.with_streaming_response.get_resolved_logs_count(
@@ -402,7 +402,7 @@ def test_streaming_response_get_resolved_logs_count(self, client: Codex) -> None
 
         assert cast(Any, response.is_closed) is True
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     def test_path_params_get_resolved_logs_count(self, client: Codex) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `project_id` but received ''"):
@@ -417,7 +417,7 @@ def test_path_params_get_resolved_logs_count(self, client: Codex) -> None:
                 project_id="182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e",
             )
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     def test_method_list_resolved_logs(self, client: Codex) -> None:
         remediation = client.projects.remediations.list_resolved_logs(
@@ -426,7 +426,7 @@ def test_method_list_resolved_logs(self, client: Codex) -> None:
         )
         assert_matches_type(RemediationListResolvedLogsResponse, remediation, path=["response"])
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     def test_raw_response_list_resolved_logs(self, client: Codex) -> None:
         response = client.projects.remediations.with_raw_response.list_resolved_logs(
@@ -439,7 +439,7 @@ def test_raw_response_list_resolved_logs(self, client: Codex) -> None:
         remediation = response.parse()
         assert_matches_type(RemediationListResolvedLogsResponse, remediation, path=["response"])
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     def test_streaming_response_list_resolved_logs(self, client: Codex) -> None:
         with client.projects.remediations.with_streaming_response.list_resolved_logs(
@@ -454,7 +454,7 @@ def test_streaming_response_list_resolved_logs(self, client: Codex) -> None:
 
         assert cast(Any, response.is_closed) is True
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     def test_path_params_list_resolved_logs(self, client: Codex) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `project_id` but received ''"):
@@ -469,7 +469,7 @@ def test_path_params_list_resolved_logs(self, client: Codex) -> None:
                 project_id="182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e",
             )
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     def test_method_pause(self, client: Codex) -> None:
         remediation = client.projects.remediations.pause(
@@ -478,7 +478,7 @@ def test_method_pause(self, client: Codex) -> None:
         )
         assert_matches_type(RemediationPauseResponse, remediation, path=["response"])
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     def test_raw_response_pause(self, client: Codex) -> None:
         response = client.projects.remediations.with_raw_response.pause(
@@ -491,7 +491,7 @@ def test_raw_response_pause(self, client: Codex) -> None:
         remediation = response.parse()
         assert_matches_type(RemediationPauseResponse, remediation, path=["response"])
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     def test_streaming_response_pause(self, client: Codex) -> None:
         with client.projects.remediations.with_streaming_response.pause(
@@ -506,7 +506,7 @@ def test_streaming_response_pause(self, client: Codex) -> None:
 
         assert cast(Any, response.is_closed) is True
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     def test_path_params_pause(self, client: Codex) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `project_id` but received ''"):
@@ -521,7 +521,7 @@ def test_path_params_pause(self, client: Codex) -> None:
                 project_id="182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e",
             )
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     def test_method_publish(self, client: Codex) -> None:
         remediation = client.projects.remediations.publish(
@@ -530,7 +530,7 @@ def test_method_publish(self, client: Codex) -> None:
         )
         assert_matches_type(RemediationPublishResponse, remediation, path=["response"])
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     def test_raw_response_publish(self, client: Codex) -> None:
         response = client.projects.remediations.with_raw_response.publish(
@@ -543,7 +543,7 @@ def test_raw_response_publish(self, client: Codex) -> None:
         remediation = response.parse()
         assert_matches_type(RemediationPublishResponse, remediation, path=["response"])
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     def test_streaming_response_publish(self, client: Codex) -> None:
         with client.projects.remediations.with_streaming_response.publish(
@@ -558,7 +558,7 @@ def test_streaming_response_publish(self, client: Codex) -> None:
 
         assert cast(Any, response.is_closed) is True
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     def test_path_params_publish(self, client: Codex) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `project_id` but received ''"):
@@ -573,7 +573,7 @@ def test_path_params_publish(self, client: Codex) -> None:
                 project_id="182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e",
             )
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     def test_method_unpause(self, client: Codex) -> None:
         remediation = client.projects.remediations.unpause(
@@ -582,7 +582,7 @@ def test_method_unpause(self, client: Codex) -> None:
         )
         assert_matches_type(RemediationUnpauseResponse, remediation, path=["response"])
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     def test_raw_response_unpause(self, client: Codex) -> None:
         response = client.projects.remediations.with_raw_response.unpause(
@@ -595,7 +595,7 @@ def test_raw_response_unpause(self, client: Codex) -> None:
         remediation = response.parse()
         assert_matches_type(RemediationUnpauseResponse, remediation, path=["response"])
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     def test_streaming_response_unpause(self, client: Codex) -> None:
         with client.projects.remediations.with_streaming_response.unpause(
@@ -610,7 +610,7 @@ def test_streaming_response_unpause(self, client: Codex) -> None:
 
         assert cast(Any, response.is_closed) is True
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     def test_path_params_unpause(self, client: Codex) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `project_id` but received ''"):
@@ -631,7 +631,7 @@ class TestAsyncRemediations:
         "async_client", [False, True, {"http_client": "aiohttp"}], indirect=True, ids=["loose", "strict", "aiohttp"]
     )
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     async def test_method_create(self, async_client: AsyncCodex) -> None:
         remediation = await async_client.projects.remediations.create(
@@ -640,7 +640,7 @@ async def test_method_create(self, async_client: AsyncCodex) -> None:
         )
         assert_matches_type(RemediationCreateResponse, remediation, path=["response"])
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     async def test_method_create_with_all_params(self, async_client: AsyncCodex) -> None:
         remediation = await async_client.projects.remediations.create(
@@ -651,7 +651,7 @@ async def test_method_create_with_all_params(self, async_client: AsyncCodex) ->
         )
         assert_matches_type(RemediationCreateResponse, remediation, path=["response"])
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     async def test_raw_response_create(self, async_client: AsyncCodex) -> None:
         response = await async_client.projects.remediations.with_raw_response.create(
@@ -664,7 +664,7 @@ async def test_raw_response_create(self, async_client: AsyncCodex) -> None:
         remediation = await response.parse()
         assert_matches_type(RemediationCreateResponse, remediation, path=["response"])
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     async def test_streaming_response_create(self, async_client: AsyncCodex) -> None:
         async with async_client.projects.remediations.with_streaming_response.create(
@@ -679,7 +679,7 @@ async def test_streaming_response_create(self, async_client: AsyncCodex) -> None
 
         assert cast(Any, response.is_closed) is True
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     async def test_path_params_create(self, async_client: AsyncCodex) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `project_id` but received ''"):
@@ -688,7 +688,7 @@ async def test_path_params_create(self, async_client: AsyncCodex) -> None:
                 question="x",
             )
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     async def test_method_retrieve(self, async_client: AsyncCodex) -> None:
         remediation = await async_client.projects.remediations.retrieve(
@@ -697,7 +697,7 @@ async def test_method_retrieve(self, async_client: AsyncCodex) -> None:
         )
         assert_matches_type(RemediationRetrieveResponse, remediation, path=["response"])
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     async def test_raw_response_retrieve(self, async_client: AsyncCodex) -> None:
         response = await async_client.projects.remediations.with_raw_response.retrieve(
@@ -710,7 +710,7 @@ async def test_raw_response_retrieve(self, async_client: AsyncCodex) -> None:
         remediation = await response.parse()
         assert_matches_type(RemediationRetrieveResponse, remediation, path=["response"])
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     async def test_streaming_response_retrieve(self, async_client: AsyncCodex) -> None:
         async with async_client.projects.remediations.with_streaming_response.retrieve(
@@ -725,7 +725,7 @@ async def test_streaming_response_retrieve(self, async_client: AsyncCodex) -> No
 
         assert cast(Any, response.is_closed) is True
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     async def test_path_params_retrieve(self, async_client: AsyncCodex) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `project_id` but received ''"):
@@ -740,7 +740,7 @@ async def test_path_params_retrieve(self, async_client: AsyncCodex) -> None:
                 project_id="182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e",
             )
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     async def test_method_list(self, async_client: AsyncCodex) -> None:
         remediation = await async_client.projects.remediations.list(
@@ -748,7 +748,7 @@ async def test_method_list(self, async_client: AsyncCodex) -> None:
         )
         assert_matches_type(AsyncOffsetPageRemediations[RemediationListResponse], remediation, path=["response"])
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     async def test_method_list_with_all_params(self, async_client: AsyncCodex) -> None:
         remediation = await async_client.projects.remediations.list(
@@ -766,7 +766,7 @@ async def test_method_list_with_all_params(self, async_client: AsyncCodex) -> No
         )
         assert_matches_type(AsyncOffsetPageRemediations[RemediationListResponse], remediation, path=["response"])
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     async def test_raw_response_list(self, async_client: AsyncCodex) -> None:
         response = await async_client.projects.remediations.with_raw_response.list(
@@ -778,7 +778,7 @@ async def test_raw_response_list(self, async_client: AsyncCodex) -> None:
         remediation = await response.parse()
         assert_matches_type(AsyncOffsetPageRemediations[RemediationListResponse], remediation, path=["response"])
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     async def test_streaming_response_list(self, async_client: AsyncCodex) -> None:
         async with async_client.projects.remediations.with_streaming_response.list(
@@ -792,7 +792,7 @@ async def test_streaming_response_list(self, async_client: AsyncCodex) -> None:
 
         assert cast(Any, response.is_closed) is True
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     async def test_path_params_list(self, async_client: AsyncCodex) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `project_id` but received ''"):
@@ -800,7 +800,7 @@ async def test_path_params_list(self, async_client: AsyncCodex) -> None:
                 project_id="",
             )
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     async def test_method_delete(self, async_client: AsyncCodex) -> None:
         remediation = await async_client.projects.remediations.delete(
@@ -809,7 +809,7 @@ async def test_method_delete(self, async_client: AsyncCodex) -> None:
         )
         assert remediation is None
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     async def test_raw_response_delete(self, async_client: AsyncCodex) -> None:
         response = await async_client.projects.remediations.with_raw_response.delete(
@@ -822,7 +822,7 @@ async def test_raw_response_delete(self, async_client: AsyncCodex) -> None:
         remediation = await response.parse()
         assert remediation is None
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     async def test_streaming_response_delete(self, async_client: AsyncCodex) -> None:
         async with async_client.projects.remediations.with_streaming_response.delete(
@@ -837,7 +837,7 @@ async def test_streaming_response_delete(self, async_client: AsyncCodex) -> None
 
         assert cast(Any, response.is_closed) is True
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     async def test_path_params_delete(self, async_client: AsyncCodex) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `project_id` but received ''"):
@@ -852,7 +852,7 @@ async def test_path_params_delete(self, async_client: AsyncCodex) -> None:
                 project_id="182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e",
             )
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     async def test_method_edit_answer(self, async_client: AsyncCodex) -> None:
         remediation = await async_client.projects.remediations.edit_answer(
@@ -862,7 +862,7 @@ async def test_method_edit_answer(self, async_client: AsyncCodex) -> None:
         )
         assert_matches_type(RemediationEditAnswerResponse, remediation, path=["response"])
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     async def test_raw_response_edit_answer(self, async_client: AsyncCodex) -> None:
         response = await async_client.projects.remediations.with_raw_response.edit_answer(
@@ -876,7 +876,7 @@ async def test_raw_response_edit_answer(self, async_client: AsyncCodex) -> None:
         remediation = await response.parse()
         assert_matches_type(RemediationEditAnswerResponse, remediation, path=["response"])
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     async def test_streaming_response_edit_answer(self, async_client: AsyncCodex) -> None:
         async with async_client.projects.remediations.with_streaming_response.edit_answer(
@@ -892,7 +892,7 @@ async def test_streaming_response_edit_answer(self, async_client: AsyncCodex) ->
 
         assert cast(Any, response.is_closed) is True
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     async def test_path_params_edit_answer(self, async_client: AsyncCodex) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `project_id` but received ''"):
@@ -909,7 +909,7 @@ async def test_path_params_edit_answer(self, async_client: AsyncCodex) -> None:
                 answer="answer",
             )
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     async def test_method_edit_draft_answer(self, async_client: AsyncCodex) -> None:
         remediation = await async_client.projects.remediations.edit_draft_answer(
@@ -919,7 +919,7 @@ async def test_method_edit_draft_answer(self, async_client: AsyncCodex) -> None:
         )
         assert_matches_type(RemediationEditDraftAnswerResponse, remediation, path=["response"])
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     async def test_raw_response_edit_draft_answer(self, async_client: AsyncCodex) -> None:
         response = await async_client.projects.remediations.with_raw_response.edit_draft_answer(
@@ -933,7 +933,7 @@ async def test_raw_response_edit_draft_answer(self, async_client: AsyncCodex) ->
         remediation = await response.parse()
         assert_matches_type(RemediationEditDraftAnswerResponse, remediation, path=["response"])
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     async def test_streaming_response_edit_draft_answer(self, async_client: AsyncCodex) -> None:
         async with async_client.projects.remediations.with_streaming_response.edit_draft_answer(
@@ -949,7 +949,7 @@ async def test_streaming_response_edit_draft_answer(self, async_client: AsyncCod
 
         assert cast(Any, response.is_closed) is True
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     async def test_path_params_edit_draft_answer(self, async_client: AsyncCodex) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `project_id` but received ''"):
@@ -966,7 +966,7 @@ async def test_path_params_edit_draft_answer(self, async_client: AsyncCodex) ->
                 draft_answer="draft_answer",
             )
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     async def test_method_get_resolved_logs_count(self, async_client: AsyncCodex) -> None:
         remediation = await async_client.projects.remediations.get_resolved_logs_count(
@@ -975,7 +975,7 @@ async def test_method_get_resolved_logs_count(self, async_client: AsyncCodex) ->
         )
         assert_matches_type(RemediationGetResolvedLogsCountResponse, remediation, path=["response"])
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     async def test_raw_response_get_resolved_logs_count(self, async_client: AsyncCodex) -> None:
         response = await async_client.projects.remediations.with_raw_response.get_resolved_logs_count(
@@ -988,7 +988,7 @@ async def test_raw_response_get_resolved_logs_count(self, async_client: AsyncCod
         remediation = await response.parse()
         assert_matches_type(RemediationGetResolvedLogsCountResponse, remediation, path=["response"])
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     async def test_streaming_response_get_resolved_logs_count(self, async_client: AsyncCodex) -> None:
         async with async_client.projects.remediations.with_streaming_response.get_resolved_logs_count(
@@ -1003,7 +1003,7 @@ async def test_streaming_response_get_resolved_logs_count(self, async_client: As
 
         assert cast(Any, response.is_closed) is True
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     async def test_path_params_get_resolved_logs_count(self, async_client: AsyncCodex) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `project_id` but received ''"):
@@ -1018,7 +1018,7 @@ async def test_path_params_get_resolved_logs_count(self, async_client: AsyncCode
                 project_id="182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e",
             )
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     async def test_method_list_resolved_logs(self, async_client: AsyncCodex) -> None:
         remediation = await async_client.projects.remediations.list_resolved_logs(
@@ -1027,7 +1027,7 @@ async def test_method_list_resolved_logs(self, async_client: AsyncCodex) -> None
         )
         assert_matches_type(RemediationListResolvedLogsResponse, remediation, path=["response"])
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     async def test_raw_response_list_resolved_logs(self, async_client: AsyncCodex) -> None:
         response = await async_client.projects.remediations.with_raw_response.list_resolved_logs(
@@ -1040,7 +1040,7 @@ async def test_raw_response_list_resolved_logs(self, async_client: AsyncCodex) -
         remediation = await response.parse()
         assert_matches_type(RemediationListResolvedLogsResponse, remediation, path=["response"])
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     async def test_streaming_response_list_resolved_logs(self, async_client: AsyncCodex) -> None:
         async with async_client.projects.remediations.with_streaming_response.list_resolved_logs(
@@ -1055,7 +1055,7 @@ async def test_streaming_response_list_resolved_logs(self, async_client: AsyncCo
 
         assert cast(Any, response.is_closed) is True
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     async def test_path_params_list_resolved_logs(self, async_client: AsyncCodex) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `project_id` but received ''"):
@@ -1070,7 +1070,7 @@ async def test_path_params_list_resolved_logs(self, async_client: AsyncCodex) ->
                 project_id="182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e",
             )
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     async def test_method_pause(self, async_client: AsyncCodex) -> None:
         remediation = await async_client.projects.remediations.pause(
@@ -1079,7 +1079,7 @@ async def test_method_pause(self, async_client: AsyncCodex) -> None:
         )
         assert_matches_type(RemediationPauseResponse, remediation, path=["response"])
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     async def test_raw_response_pause(self, async_client: AsyncCodex) -> None:
         response = await async_client.projects.remediations.with_raw_response.pause(
@@ -1092,7 +1092,7 @@ async def test_raw_response_pause(self, async_client: AsyncCodex) -> None:
         remediation = await response.parse()
         assert_matches_type(RemediationPauseResponse, remediation, path=["response"])
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     async def test_streaming_response_pause(self, async_client: AsyncCodex) -> None:
         async with async_client.projects.remediations.with_streaming_response.pause(
@@ -1107,7 +1107,7 @@ async def test_streaming_response_pause(self, async_client: AsyncCodex) -> None:
 
         assert cast(Any, response.is_closed) is True
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     async def test_path_params_pause(self, async_client: AsyncCodex) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `project_id` but received ''"):
@@ -1122,7 +1122,7 @@ async def test_path_params_pause(self, async_client: AsyncCodex) -> None:
                 project_id="182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e",
             )
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     async def test_method_publish(self, async_client: AsyncCodex) -> None:
         remediation = await async_client.projects.remediations.publish(
@@ -1131,7 +1131,7 @@ async def test_method_publish(self, async_client: AsyncCodex) -> None:
         )
         assert_matches_type(RemediationPublishResponse, remediation, path=["response"])
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     async def test_raw_response_publish(self, async_client: AsyncCodex) -> None:
         response = await async_client.projects.remediations.with_raw_response.publish(
@@ -1144,7 +1144,7 @@ async def test_raw_response_publish(self, async_client: AsyncCodex) -> None:
         remediation = await response.parse()
         assert_matches_type(RemediationPublishResponse, remediation, path=["response"])
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     async def test_streaming_response_publish(self, async_client: AsyncCodex) -> None:
         async with async_client.projects.remediations.with_streaming_response.publish(
@@ -1159,7 +1159,7 @@ async def test_streaming_response_publish(self, async_client: AsyncCodex) -> Non
 
         assert cast(Any, response.is_closed) is True
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     async def test_path_params_publish(self, async_client: AsyncCodex) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `project_id` but received ''"):
@@ -1174,7 +1174,7 @@ async def test_path_params_publish(self, async_client: AsyncCodex) -> None:
                 project_id="182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e",
             )
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     async def test_method_unpause(self, async_client: AsyncCodex) -> None:
         remediation = await async_client.projects.remediations.unpause(
@@ -1183,7 +1183,7 @@ async def test_method_unpause(self, async_client: AsyncCodex) -> None:
         )
         assert_matches_type(RemediationUnpauseResponse, remediation, path=["response"])
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     async def test_raw_response_unpause(self, async_client: AsyncCodex) -> None:
         response = await async_client.projects.remediations.with_raw_response.unpause(
@@ -1196,7 +1196,7 @@ async def test_raw_response_unpause(self, async_client: AsyncCodex) -> None:
         remediation = await response.parse()
         assert_matches_type(RemediationUnpauseResponse, remediation, path=["response"])
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     async def test_streaming_response_unpause(self, async_client: AsyncCodex) -> None:
         async with async_client.projects.remediations.with_streaming_response.unpause(
@@ -1211,7 +1211,7 @@ async def test_streaming_response_unpause(self, async_client: AsyncCodex) -> Non
 
         assert cast(Any, response.is_closed) is True
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     async def test_path_params_unpause(self, async_client: AsyncCodex) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `project_id` but received ''"):
diff --git a/tests/api_resources/test_health.py b/tests/api_resources/test_health.py
index 92db3a81..0baadf50 100644
--- a/tests/api_resources/test_health.py
+++ b/tests/api_resources/test_health.py
@@ -17,13 +17,13 @@
 class TestHealth:
     parametrize = pytest.mark.parametrize("client", [False, True], indirect=True, ids=["loose", "strict"])
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     def test_method_check(self, client: Codex) -> None:
         health = client.health.check()
         assert_matches_type(HealthCheckResponse, health, path=["response"])
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     def test_raw_response_check(self, client: Codex) -> None:
         response = client.health.with_raw_response.check()
@@ -33,7 +33,7 @@ def test_raw_response_check(self, client: Codex) -> None:
         health = response.parse()
         assert_matches_type(HealthCheckResponse, health, path=["response"])
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     def test_streaming_response_check(self, client: Codex) -> None:
         with client.health.with_streaming_response.check() as response:
@@ -45,13 +45,13 @@ def test_streaming_response_check(self, client: Codex) -> None:
 
         assert cast(Any, response.is_closed) is True
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     def test_method_db(self, client: Codex) -> None:
         health = client.health.db()
         assert_matches_type(HealthCheckResponse, health, path=["response"])
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     def test_raw_response_db(self, client: Codex) -> None:
         response = client.health.with_raw_response.db()
@@ -61,7 +61,7 @@ def test_raw_response_db(self, client: Codex) -> None:
         health = response.parse()
         assert_matches_type(HealthCheckResponse, health, path=["response"])
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     def test_streaming_response_db(self, client: Codex) -> None:
         with client.health.with_streaming_response.db() as response:
@@ -79,13 +79,13 @@ class TestAsyncHealth:
         "async_client", [False, True, {"http_client": "aiohttp"}], indirect=True, ids=["loose", "strict", "aiohttp"]
     )
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     async def test_method_check(self, async_client: AsyncCodex) -> None:
         health = await async_client.health.check()
         assert_matches_type(HealthCheckResponse, health, path=["response"])
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     async def test_raw_response_check(self, async_client: AsyncCodex) -> None:
         response = await async_client.health.with_raw_response.check()
@@ -95,7 +95,7 @@ async def test_raw_response_check(self, async_client: AsyncCodex) -> None:
         health = await response.parse()
         assert_matches_type(HealthCheckResponse, health, path=["response"])
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     async def test_streaming_response_check(self, async_client: AsyncCodex) -> None:
         async with async_client.health.with_streaming_response.check() as response:
@@ -107,13 +107,13 @@ async def test_streaming_response_check(self, async_client: AsyncCodex) -> None:
 
         assert cast(Any, response.is_closed) is True
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     async def test_method_db(self, async_client: AsyncCodex) -> None:
         health = await async_client.health.db()
         assert_matches_type(HealthCheckResponse, health, path=["response"])
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     async def test_raw_response_db(self, async_client: AsyncCodex) -> None:
         response = await async_client.health.with_raw_response.db()
@@ -123,7 +123,7 @@ async def test_raw_response_db(self, async_client: AsyncCodex) -> None:
         health = await response.parse()
         assert_matches_type(HealthCheckResponse, health, path=["response"])
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     async def test_streaming_response_db(self, async_client: AsyncCodex) -> None:
         async with async_client.health.with_streaming_response.db() as response:
diff --git a/tests/api_resources/test_organizations.py b/tests/api_resources/test_organizations.py
index eecdf3f7..9245bd89 100644
--- a/tests/api_resources/test_organizations.py
+++ b/tests/api_resources/test_organizations.py
@@ -21,7 +21,7 @@
 class TestOrganizations:
     parametrize = pytest.mark.parametrize("client", [False, True], indirect=True, ids=["loose", "strict"])
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     def test_method_retrieve(self, client: Codex) -> None:
         organization = client.organizations.retrieve(
@@ -29,7 +29,7 @@ def test_method_retrieve(self, client: Codex) -> None:
         )
         assert_matches_type(OrganizationSchemaPublic, organization, path=["response"])
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     def test_raw_response_retrieve(self, client: Codex) -> None:
         response = client.organizations.with_raw_response.retrieve(
@@ -41,7 +41,7 @@ def test_raw_response_retrieve(self, client: Codex) -> None:
         organization = response.parse()
         assert_matches_type(OrganizationSchemaPublic, organization, path=["response"])
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     def test_streaming_response_retrieve(self, client: Codex) -> None:
         with client.organizations.with_streaming_response.retrieve(
@@ -55,7 +55,7 @@ def test_streaming_response_retrieve(self, client: Codex) -> None:
 
         assert cast(Any, response.is_closed) is True
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     def test_path_params_retrieve(self, client: Codex) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `organization_id` but received ''"):
@@ -63,7 +63,7 @@ def test_path_params_retrieve(self, client: Codex) -> None:
                 "",
             )
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     def test_method_list_members(self, client: Codex) -> None:
         organization = client.organizations.list_members(
@@ -71,7 +71,7 @@ def test_method_list_members(self, client: Codex) -> None:
         )
         assert_matches_type(OrganizationListMembersResponse, organization, path=["response"])
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     def test_raw_response_list_members(self, client: Codex) -> None:
         response = client.organizations.with_raw_response.list_members(
@@ -83,7 +83,7 @@ def test_raw_response_list_members(self, client: Codex) -> None:
         organization = response.parse()
         assert_matches_type(OrganizationListMembersResponse, organization, path=["response"])
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     def test_streaming_response_list_members(self, client: Codex) -> None:
         with client.organizations.with_streaming_response.list_members(
@@ -97,7 +97,7 @@ def test_streaming_response_list_members(self, client: Codex) -> None:
 
         assert cast(Any, response.is_closed) is True
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     def test_path_params_list_members(self, client: Codex) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `organization_id` but received ''"):
@@ -105,7 +105,7 @@ def test_path_params_list_members(self, client: Codex) -> None:
                 "",
             )
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     def test_method_retrieve_permissions(self, client: Codex) -> None:
         organization = client.organizations.retrieve_permissions(
@@ -113,7 +113,7 @@ def test_method_retrieve_permissions(self, client: Codex) -> None:
         )
         assert_matches_type(OrganizationRetrievePermissionsResponse, organization, path=["response"])
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     def test_raw_response_retrieve_permissions(self, client: Codex) -> None:
         response = client.organizations.with_raw_response.retrieve_permissions(
@@ -125,7 +125,7 @@ def test_raw_response_retrieve_permissions(self, client: Codex) -> None:
         organization = response.parse()
         assert_matches_type(OrganizationRetrievePermissionsResponse, organization, path=["response"])
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     def test_streaming_response_retrieve_permissions(self, client: Codex) -> None:
         with client.organizations.with_streaming_response.retrieve_permissions(
@@ -139,7 +139,7 @@ def test_streaming_response_retrieve_permissions(self, client: Codex) -> None:
 
         assert cast(Any, response.is_closed) is True
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     def test_path_params_retrieve_permissions(self, client: Codex) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `organization_id` but received ''"):
@@ -153,7 +153,7 @@ class TestAsyncOrganizations:
         "async_client", [False, True, {"http_client": "aiohttp"}], indirect=True, ids=["loose", "strict", "aiohttp"]
     )
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     async def test_method_retrieve(self, async_client: AsyncCodex) -> None:
         organization = await async_client.organizations.retrieve(
@@ -161,7 +161,7 @@ async def test_method_retrieve(self, async_client: AsyncCodex) -> None:
         )
         assert_matches_type(OrganizationSchemaPublic, organization, path=["response"])
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     async def test_raw_response_retrieve(self, async_client: AsyncCodex) -> None:
         response = await async_client.organizations.with_raw_response.retrieve(
@@ -173,7 +173,7 @@ async def test_raw_response_retrieve(self, async_client: AsyncCodex) -> None:
         organization = await response.parse()
         assert_matches_type(OrganizationSchemaPublic, organization, path=["response"])
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     async def test_streaming_response_retrieve(self, async_client: AsyncCodex) -> None:
         async with async_client.organizations.with_streaming_response.retrieve(
@@ -187,7 +187,7 @@ async def test_streaming_response_retrieve(self, async_client: AsyncCodex) -> No
 
         assert cast(Any, response.is_closed) is True
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     async def test_path_params_retrieve(self, async_client: AsyncCodex) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `organization_id` but received ''"):
@@ -195,7 +195,7 @@ async def test_path_params_retrieve(self, async_client: AsyncCodex) -> None:
                 "",
             )
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     async def test_method_list_members(self, async_client: AsyncCodex) -> None:
         organization = await async_client.organizations.list_members(
@@ -203,7 +203,7 @@ async def test_method_list_members(self, async_client: AsyncCodex) -> None:
         )
         assert_matches_type(OrganizationListMembersResponse, organization, path=["response"])
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     async def test_raw_response_list_members(self, async_client: AsyncCodex) -> None:
         response = await async_client.organizations.with_raw_response.list_members(
@@ -215,7 +215,7 @@ async def test_raw_response_list_members(self, async_client: AsyncCodex) -> None
         organization = await response.parse()
         assert_matches_type(OrganizationListMembersResponse, organization, path=["response"])
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     async def test_streaming_response_list_members(self, async_client: AsyncCodex) -> None:
         async with async_client.organizations.with_streaming_response.list_members(
@@ -229,7 +229,7 @@ async def test_streaming_response_list_members(self, async_client: AsyncCodex) -
 
         assert cast(Any, response.is_closed) is True
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     async def test_path_params_list_members(self, async_client: AsyncCodex) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `organization_id` but received ''"):
@@ -237,7 +237,7 @@ async def test_path_params_list_members(self, async_client: AsyncCodex) -> None:
                 "",
             )
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     async def test_method_retrieve_permissions(self, async_client: AsyncCodex) -> None:
         organization = await async_client.organizations.retrieve_permissions(
@@ -245,7 +245,7 @@ async def test_method_retrieve_permissions(self, async_client: AsyncCodex) -> No
         )
         assert_matches_type(OrganizationRetrievePermissionsResponse, organization, path=["response"])
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     async def test_raw_response_retrieve_permissions(self, async_client: AsyncCodex) -> None:
         response = await async_client.organizations.with_raw_response.retrieve_permissions(
@@ -257,7 +257,7 @@ async def test_raw_response_retrieve_permissions(self, async_client: AsyncCodex)
         organization = await response.parse()
         assert_matches_type(OrganizationRetrievePermissionsResponse, organization, path=["response"])
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     async def test_streaming_response_retrieve_permissions(self, async_client: AsyncCodex) -> None:
         async with async_client.organizations.with_streaming_response.retrieve_permissions(
@@ -271,7 +271,7 @@ async def test_streaming_response_retrieve_permissions(self, async_client: Async
 
         assert cast(Any, response.is_closed) is True
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     async def test_path_params_retrieve_permissions(self, async_client: AsyncCodex) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `organization_id` but received ''"):
diff --git a/tests/api_resources/test_projects.py b/tests/api_resources/test_projects.py
index 7884db0f..04eef999 100644
--- a/tests/api_resources/test_projects.py
+++ b/tests/api_resources/test_projects.py
@@ -24,7 +24,7 @@
 class TestProjects:
     parametrize = pytest.mark.parametrize("client", [False, True], indirect=True, ids=["loose", "strict"])
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     def test_method_create(self, client: Codex) -> None:
         project = client.projects.create(
@@ -34,7 +34,7 @@ def test_method_create(self, client: Codex) -> None:
         )
         assert_matches_type(ProjectReturnSchema, project, path=["response"])
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     def test_method_create_with_all_params(self, client: Codex) -> None:
         project = client.projects.create(
@@ -128,7 +128,7 @@ def test_method_create_with_all_params(self, client: Codex) -> None:
         )
         assert_matches_type(ProjectReturnSchema, project, path=["response"])
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     def test_raw_response_create(self, client: Codex) -> None:
         response = client.projects.with_raw_response.create(
@@ -142,7 +142,7 @@ def test_raw_response_create(self, client: Codex) -> None:
         project = response.parse()
         assert_matches_type(ProjectReturnSchema, project, path=["response"])
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     def test_streaming_response_create(self, client: Codex) -> None:
         with client.projects.with_streaming_response.create(
@@ -158,7 +158,7 @@ def test_streaming_response_create(self, client: Codex) -> None:
 
         assert cast(Any, response.is_closed) is True
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     def test_method_retrieve(self, client: Codex) -> None:
         project = client.projects.retrieve(
@@ -166,7 +166,7 @@ def test_method_retrieve(self, client: Codex) -> None:
         )
         assert_matches_type(ProjectRetrieveResponse, project, path=["response"])
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     def test_raw_response_retrieve(self, client: Codex) -> None:
         response = client.projects.with_raw_response.retrieve(
@@ -178,7 +178,7 @@ def test_raw_response_retrieve(self, client: Codex) -> None:
         project = response.parse()
         assert_matches_type(ProjectRetrieveResponse, project, path=["response"])
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     def test_streaming_response_retrieve(self, client: Codex) -> None:
         with client.projects.with_streaming_response.retrieve(
@@ -192,7 +192,7 @@ def test_streaming_response_retrieve(self, client: Codex) -> None:
 
         assert cast(Any, response.is_closed) is True
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     def test_path_params_retrieve(self, client: Codex) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `project_id` but received ''"):
@@ -200,7 +200,7 @@ def test_path_params_retrieve(self, client: Codex) -> None:
                 "",
             )
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     def test_method_update(self, client: Codex) -> None:
         project = client.projects.update(
@@ -208,7 +208,7 @@ def test_method_update(self, client: Codex) -> None:
         )
         assert_matches_type(ProjectReturnSchema, project, path=["response"])
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     def test_method_update_with_all_params(self, client: Codex) -> None:
         project = client.projects.update(
@@ -302,7 +302,7 @@ def test_method_update_with_all_params(self, client: Codex) -> None:
         )
         assert_matches_type(ProjectReturnSchema, project, path=["response"])
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     def test_raw_response_update(self, client: Codex) -> None:
         response = client.projects.with_raw_response.update(
@@ -314,7 +314,7 @@ def test_raw_response_update(self, client: Codex) -> None:
         project = response.parse()
         assert_matches_type(ProjectReturnSchema, project, path=["response"])
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     def test_streaming_response_update(self, client: Codex) -> None:
         with client.projects.with_streaming_response.update(
@@ -328,7 +328,7 @@ def test_streaming_response_update(self, client: Codex) -> None:
 
         assert cast(Any, response.is_closed) is True
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     def test_path_params_update(self, client: Codex) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `project_id` but received ''"):
@@ -336,13 +336,13 @@ def test_path_params_update(self, client: Codex) -> None:
                 project_id="",
             )
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     def test_method_list(self, client: Codex) -> None:
         project = client.projects.list()
         assert_matches_type(ProjectListResponse, project, path=["response"])
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     def test_method_list_with_all_params(self, client: Codex) -> None:
         project = client.projects.list(
@@ -356,7 +356,7 @@ def test_method_list_with_all_params(self, client: Codex) -> None:
         )
         assert_matches_type(ProjectListResponse, project, path=["response"])
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     def test_raw_response_list(self, client: Codex) -> None:
         response = client.projects.with_raw_response.list()
@@ -366,7 +366,7 @@ def test_raw_response_list(self, client: Codex) -> None:
         project = response.parse()
         assert_matches_type(ProjectListResponse, project, path=["response"])
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     def test_streaming_response_list(self, client: Codex) -> None:
         with client.projects.with_streaming_response.list() as response:
@@ -378,7 +378,7 @@ def test_streaming_response_list(self, client: Codex) -> None:
 
         assert cast(Any, response.is_closed) is True
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     def test_method_delete(self, client: Codex) -> None:
         project = client.projects.delete(
@@ -386,7 +386,7 @@ def test_method_delete(self, client: Codex) -> None:
         )
         assert project is None
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     def test_raw_response_delete(self, client: Codex) -> None:
         response = client.projects.with_raw_response.delete(
@@ -398,7 +398,7 @@ def test_raw_response_delete(self, client: Codex) -> None:
         project = response.parse()
         assert project is None
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     def test_streaming_response_delete(self, client: Codex) -> None:
         with client.projects.with_streaming_response.delete(
@@ -412,7 +412,7 @@ def test_streaming_response_delete(self, client: Codex) -> None:
 
         assert cast(Any, response.is_closed) is True
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     def test_path_params_delete(self, client: Codex) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `project_id` but received ''"):
@@ -420,7 +420,7 @@ def test_path_params_delete(self, client: Codex) -> None:
                 "",
             )
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     def test_method_export(self, client: Codex) -> None:
         project = client.projects.export(
@@ -428,7 +428,7 @@ def test_method_export(self, client: Codex) -> None:
         )
         assert_matches_type(object, project, path=["response"])
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     def test_raw_response_export(self, client: Codex) -> None:
         response = client.projects.with_raw_response.export(
@@ -440,7 +440,7 @@ def test_raw_response_export(self, client: Codex) -> None:
         project = response.parse()
         assert_matches_type(object, project, path=["response"])
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     def test_streaming_response_export(self, client: Codex) -> None:
         with client.projects.with_streaming_response.export(
@@ -454,7 +454,7 @@ def test_streaming_response_export(self, client: Codex) -> None:
 
         assert cast(Any, response.is_closed) is True
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     def test_path_params_export(self, client: Codex) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `project_id` but received ''"):
@@ -462,7 +462,7 @@ def test_path_params_export(self, client: Codex) -> None:
                 "",
             )
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     def test_method_invite_sme(self, client: Codex) -> None:
         project = client.projects.invite_sme(
@@ -473,7 +473,7 @@ def test_method_invite_sme(self, client: Codex) -> None:
         )
         assert_matches_type(ProjectInviteSmeResponse, project, path=["response"])
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     def test_raw_response_invite_sme(self, client: Codex) -> None:
         response = client.projects.with_raw_response.invite_sme(
@@ -488,7 +488,7 @@ def test_raw_response_invite_sme(self, client: Codex) -> None:
         project = response.parse()
         assert_matches_type(ProjectInviteSmeResponse, project, path=["response"])
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     def test_streaming_response_invite_sme(self, client: Codex) -> None:
         with client.projects.with_streaming_response.invite_sme(
@@ -505,7 +505,7 @@ def test_streaming_response_invite_sme(self, client: Codex) -> None:
 
         assert cast(Any, response.is_closed) is True
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     def test_path_params_invite_sme(self, client: Codex) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `project_id` but received ''"):
@@ -516,7 +516,7 @@ def test_path_params_invite_sme(self, client: Codex) -> None:
                 url_query_string="url_query_string",
             )
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     def test_method_retrieve_analytics(self, client: Codex) -> None:
         project = client.projects.retrieve_analytics(
@@ -524,7 +524,7 @@ def test_method_retrieve_analytics(self, client: Codex) -> None:
         )
         assert_matches_type(ProjectRetrieveAnalyticsResponse, project, path=["response"])
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     def test_method_retrieve_analytics_with_all_params(self, client: Codex) -> None:
         project = client.projects.retrieve_analytics(
@@ -534,7 +534,7 @@ def test_method_retrieve_analytics_with_all_params(self, client: Codex) -> None:
         )
         assert_matches_type(ProjectRetrieveAnalyticsResponse, project, path=["response"])
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     def test_raw_response_retrieve_analytics(self, client: Codex) -> None:
         response = client.projects.with_raw_response.retrieve_analytics(
@@ -546,7 +546,7 @@ def test_raw_response_retrieve_analytics(self, client: Codex) -> None:
         project = response.parse()
         assert_matches_type(ProjectRetrieveAnalyticsResponse, project, path=["response"])
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     def test_streaming_response_retrieve_analytics(self, client: Codex) -> None:
         with client.projects.with_streaming_response.retrieve_analytics(
@@ -560,7 +560,7 @@ def test_streaming_response_retrieve_analytics(self, client: Codex) -> None:
 
         assert cast(Any, response.is_closed) is True
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     def test_path_params_retrieve_analytics(self, client: Codex) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `project_id` but received ''"):
@@ -568,7 +568,7 @@ def test_path_params_retrieve_analytics(self, client: Codex) -> None:
                 project_id="",
             )
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     def test_method_validate(self, client: Codex) -> None:
         project = client.projects.validate(
@@ -579,7 +579,7 @@ def test_method_validate(self, client: Codex) -> None:
         )
         assert_matches_type(ProjectValidateResponse, project, path=["response"])
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     def test_method_validate_with_all_params(self, client: Codex) -> None:
         project = client.projects.validate(
@@ -617,6 +617,8 @@ def test_method_validate_with_all_params(self, client: Codex) -> None:
             ],
             options={
                 "custom_eval_criteria": [{}],
+                "disable_persistence": True,
+                "disable_trustworthiness": True,
                 "log": ["string"],
                 "max_tokens": 0,
                 "model": "model",
@@ -649,7 +651,7 @@ def test_method_validate_with_all_params(self, client: Codex) -> None:
         )
         assert_matches_type(ProjectValidateResponse, project, path=["response"])
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     def test_raw_response_validate(self, client: Codex) -> None:
         response = client.projects.with_raw_response.validate(
@@ -664,7 +666,7 @@ def test_raw_response_validate(self, client: Codex) -> None:
         project = response.parse()
         assert_matches_type(ProjectValidateResponse, project, path=["response"])
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     def test_streaming_response_validate(self, client: Codex) -> None:
         with client.projects.with_streaming_response.validate(
@@ -681,7 +683,7 @@ def test_streaming_response_validate(self, client: Codex) -> None:
 
         assert cast(Any, response.is_closed) is True
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     def test_path_params_validate(self, client: Codex) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `project_id` but received ''"):
@@ -698,7 +700,7 @@ class TestAsyncProjects:
         "async_client", [False, True, {"http_client": "aiohttp"}], indirect=True, ids=["loose", "strict", "aiohttp"]
     )
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     async def test_method_create(self, async_client: AsyncCodex) -> None:
         project = await async_client.projects.create(
@@ -708,7 +710,7 @@ async def test_method_create(self, async_client: AsyncCodex) -> None:
         )
         assert_matches_type(ProjectReturnSchema, project, path=["response"])
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     async def test_method_create_with_all_params(self, async_client: AsyncCodex) -> None:
         project = await async_client.projects.create(
@@ -802,7 +804,7 @@ async def test_method_create_with_all_params(self, async_client: AsyncCodex) ->
         )
         assert_matches_type(ProjectReturnSchema, project, path=["response"])
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     async def test_raw_response_create(self, async_client: AsyncCodex) -> None:
         response = await async_client.projects.with_raw_response.create(
@@ -816,7 +818,7 @@ async def test_raw_response_create(self, async_client: AsyncCodex) -> None:
         project = await response.parse()
         assert_matches_type(ProjectReturnSchema, project, path=["response"])
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     async def test_streaming_response_create(self, async_client: AsyncCodex) -> None:
         async with async_client.projects.with_streaming_response.create(
@@ -832,7 +834,7 @@ async def test_streaming_response_create(self, async_client: AsyncCodex) -> None
 
         assert cast(Any, response.is_closed) is True
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     async def test_method_retrieve(self, async_client: AsyncCodex) -> None:
         project = await async_client.projects.retrieve(
@@ -840,7 +842,7 @@ async def test_method_retrieve(self, async_client: AsyncCodex) -> None:
         )
         assert_matches_type(ProjectRetrieveResponse, project, path=["response"])
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     async def test_raw_response_retrieve(self, async_client: AsyncCodex) -> None:
         response = await async_client.projects.with_raw_response.retrieve(
@@ -852,7 +854,7 @@ async def test_raw_response_retrieve(self, async_client: AsyncCodex) -> None:
         project = await response.parse()
         assert_matches_type(ProjectRetrieveResponse, project, path=["response"])
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     async def test_streaming_response_retrieve(self, async_client: AsyncCodex) -> None:
         async with async_client.projects.with_streaming_response.retrieve(
@@ -866,7 +868,7 @@ async def test_streaming_response_retrieve(self, async_client: AsyncCodex) -> No
 
         assert cast(Any, response.is_closed) is True
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     async def test_path_params_retrieve(self, async_client: AsyncCodex) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `project_id` but received ''"):
@@ -874,7 +876,7 @@ async def test_path_params_retrieve(self, async_client: AsyncCodex) -> None:
                 "",
             )
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     async def test_method_update(self, async_client: AsyncCodex) -> None:
         project = await async_client.projects.update(
@@ -882,7 +884,7 @@ async def test_method_update(self, async_client: AsyncCodex) -> None:
         )
         assert_matches_type(ProjectReturnSchema, project, path=["response"])
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     async def test_method_update_with_all_params(self, async_client: AsyncCodex) -> None:
         project = await async_client.projects.update(
@@ -976,7 +978,7 @@ async def test_method_update_with_all_params(self, async_client: AsyncCodex) ->
         )
         assert_matches_type(ProjectReturnSchema, project, path=["response"])
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     async def test_raw_response_update(self, async_client: AsyncCodex) -> None:
         response = await async_client.projects.with_raw_response.update(
@@ -988,7 +990,7 @@ async def test_raw_response_update(self, async_client: AsyncCodex) -> None:
         project = await response.parse()
         assert_matches_type(ProjectReturnSchema, project, path=["response"])
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     async def test_streaming_response_update(self, async_client: AsyncCodex) -> None:
         async with async_client.projects.with_streaming_response.update(
@@ -1002,7 +1004,7 @@ async def test_streaming_response_update(self, async_client: AsyncCodex) -> None
 
         assert cast(Any, response.is_closed) is True
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     async def test_path_params_update(self, async_client: AsyncCodex) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `project_id` but received ''"):
@@ -1010,13 +1012,13 @@ async def test_path_params_update(self, async_client: AsyncCodex) -> None:
                 project_id="",
             )
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     async def test_method_list(self, async_client: AsyncCodex) -> None:
         project = await async_client.projects.list()
         assert_matches_type(ProjectListResponse, project, path=["response"])
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     async def test_method_list_with_all_params(self, async_client: AsyncCodex) -> None:
         project = await async_client.projects.list(
@@ -1030,7 +1032,7 @@ async def test_method_list_with_all_params(self, async_client: AsyncCodex) -> No
         )
         assert_matches_type(ProjectListResponse, project, path=["response"])
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     async def test_raw_response_list(self, async_client: AsyncCodex) -> None:
         response = await async_client.projects.with_raw_response.list()
@@ -1040,7 +1042,7 @@ async def test_raw_response_list(self, async_client: AsyncCodex) -> None:
         project = await response.parse()
         assert_matches_type(ProjectListResponse, project, path=["response"])
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     async def test_streaming_response_list(self, async_client: AsyncCodex) -> None:
         async with async_client.projects.with_streaming_response.list() as response:
@@ -1052,7 +1054,7 @@ async def test_streaming_response_list(self, async_client: AsyncCodex) -> None:
 
         assert cast(Any, response.is_closed) is True
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     async def test_method_delete(self, async_client: AsyncCodex) -> None:
         project = await async_client.projects.delete(
@@ -1060,7 +1062,7 @@ async def test_method_delete(self, async_client: AsyncCodex) -> None:
         )
         assert project is None
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     async def test_raw_response_delete(self, async_client: AsyncCodex) -> None:
         response = await async_client.projects.with_raw_response.delete(
@@ -1072,7 +1074,7 @@ async def test_raw_response_delete(self, async_client: AsyncCodex) -> None:
         project = await response.parse()
         assert project is None
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     async def test_streaming_response_delete(self, async_client: AsyncCodex) -> None:
         async with async_client.projects.with_streaming_response.delete(
@@ -1086,7 +1088,7 @@ async def test_streaming_response_delete(self, async_client: AsyncCodex) -> None
 
         assert cast(Any, response.is_closed) is True
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     async def test_path_params_delete(self, async_client: AsyncCodex) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `project_id` but received ''"):
@@ -1094,7 +1096,7 @@ async def test_path_params_delete(self, async_client: AsyncCodex) -> None:
                 "",
             )
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     async def test_method_export(self, async_client: AsyncCodex) -> None:
         project = await async_client.projects.export(
@@ -1102,7 +1104,7 @@ async def test_method_export(self, async_client: AsyncCodex) -> None:
         )
         assert_matches_type(object, project, path=["response"])
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     async def test_raw_response_export(self, async_client: AsyncCodex) -> None:
         response = await async_client.projects.with_raw_response.export(
@@ -1114,7 +1116,7 @@ async def test_raw_response_export(self, async_client: AsyncCodex) -> None:
         project = await response.parse()
         assert_matches_type(object, project, path=["response"])
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     async def test_streaming_response_export(self, async_client: AsyncCodex) -> None:
         async with async_client.projects.with_streaming_response.export(
@@ -1128,7 +1130,7 @@ async def test_streaming_response_export(self, async_client: AsyncCodex) -> None
 
         assert cast(Any, response.is_closed) is True
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     async def test_path_params_export(self, async_client: AsyncCodex) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `project_id` but received ''"):
@@ -1136,7 +1138,7 @@ async def test_path_params_export(self, async_client: AsyncCodex) -> None:
                 "",
             )
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     async def test_method_invite_sme(self, async_client: AsyncCodex) -> None:
         project = await async_client.projects.invite_sme(
@@ -1147,7 +1149,7 @@ async def test_method_invite_sme(self, async_client: AsyncCodex) -> None:
         )
         assert_matches_type(ProjectInviteSmeResponse, project, path=["response"])
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     async def test_raw_response_invite_sme(self, async_client: AsyncCodex) -> None:
         response = await async_client.projects.with_raw_response.invite_sme(
@@ -1162,7 +1164,7 @@ async def test_raw_response_invite_sme(self, async_client: AsyncCodex) -> None:
         project = await response.parse()
         assert_matches_type(ProjectInviteSmeResponse, project, path=["response"])
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     async def test_streaming_response_invite_sme(self, async_client: AsyncCodex) -> None:
         async with async_client.projects.with_streaming_response.invite_sme(
@@ -1179,7 +1181,7 @@ async def test_streaming_response_invite_sme(self, async_client: AsyncCodex) ->
 
         assert cast(Any, response.is_closed) is True
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     async def test_path_params_invite_sme(self, async_client: AsyncCodex) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `project_id` but received ''"):
@@ -1190,7 +1192,7 @@ async def test_path_params_invite_sme(self, async_client: AsyncCodex) -> None:
                 url_query_string="url_query_string",
             )
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     async def test_method_retrieve_analytics(self, async_client: AsyncCodex) -> None:
         project = await async_client.projects.retrieve_analytics(
@@ -1198,7 +1200,7 @@ async def test_method_retrieve_analytics(self, async_client: AsyncCodex) -> None
         )
         assert_matches_type(ProjectRetrieveAnalyticsResponse, project, path=["response"])
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     async def test_method_retrieve_analytics_with_all_params(self, async_client: AsyncCodex) -> None:
         project = await async_client.projects.retrieve_analytics(
@@ -1208,7 +1210,7 @@ async def test_method_retrieve_analytics_with_all_params(self, async_client: Asy
         )
         assert_matches_type(ProjectRetrieveAnalyticsResponse, project, path=["response"])
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     async def test_raw_response_retrieve_analytics(self, async_client: AsyncCodex) -> None:
         response = await async_client.projects.with_raw_response.retrieve_analytics(
@@ -1220,7 +1222,7 @@ async def test_raw_response_retrieve_analytics(self, async_client: AsyncCodex) -
         project = await response.parse()
         assert_matches_type(ProjectRetrieveAnalyticsResponse, project, path=["response"])
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     async def test_streaming_response_retrieve_analytics(self, async_client: AsyncCodex) -> None:
         async with async_client.projects.with_streaming_response.retrieve_analytics(
@@ -1234,7 +1236,7 @@ async def test_streaming_response_retrieve_analytics(self, async_client: AsyncCo
 
         assert cast(Any, response.is_closed) is True
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     async def test_path_params_retrieve_analytics(self, async_client: AsyncCodex) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `project_id` but received ''"):
@@ -1242,7 +1244,7 @@ async def test_path_params_retrieve_analytics(self, async_client: AsyncCodex) ->
                 project_id="",
             )
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     async def test_method_validate(self, async_client: AsyncCodex) -> None:
         project = await async_client.projects.validate(
@@ -1253,7 +1255,7 @@ async def test_method_validate(self, async_client: AsyncCodex) -> None:
         )
         assert_matches_type(ProjectValidateResponse, project, path=["response"])
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     async def test_method_validate_with_all_params(self, async_client: AsyncCodex) -> None:
         project = await async_client.projects.validate(
@@ -1291,6 +1293,8 @@ async def test_method_validate_with_all_params(self, async_client: AsyncCodex) -
             ],
             options={
                 "custom_eval_criteria": [{}],
+                "disable_persistence": True,
+                "disable_trustworthiness": True,
                 "log": ["string"],
                 "max_tokens": 0,
                 "model": "model",
@@ -1323,7 +1327,7 @@ async def test_method_validate_with_all_params(self, async_client: AsyncCodex) -
         )
         assert_matches_type(ProjectValidateResponse, project, path=["response"])
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     async def test_raw_response_validate(self, async_client: AsyncCodex) -> None:
         response = await async_client.projects.with_raw_response.validate(
@@ -1338,7 +1342,7 @@ async def test_raw_response_validate(self, async_client: AsyncCodex) -> None:
         project = await response.parse()
         assert_matches_type(ProjectValidateResponse, project, path=["response"])
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     async def test_streaming_response_validate(self, async_client: AsyncCodex) -> None:
         async with async_client.projects.with_streaming_response.validate(
@@ -1355,7 +1359,7 @@ async def test_streaming_response_validate(self, async_client: AsyncCodex) -> No
 
         assert cast(Any, response.is_closed) is True
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     async def test_path_params_validate(self, async_client: AsyncCodex) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `project_id` but received ''"):
diff --git a/tests/api_resources/test_tlm.py b/tests/api_resources/test_tlm.py
index da0a9ad1..6c8c1770 100644
--- a/tests/api_resources/test_tlm.py
+++ b/tests/api_resources/test_tlm.py
@@ -17,7 +17,7 @@
 class TestTlm:
     parametrize = pytest.mark.parametrize("client", [False, True], indirect=True, ids=["loose", "strict"])
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     def test_method_prompt(self, client: Codex) -> None:
         tlm = client.tlm.prompt(
@@ -25,7 +25,7 @@ def test_method_prompt(self, client: Codex) -> None:
         )
         assert_matches_type(TlmPromptResponse, tlm, path=["response"])
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     def test_method_prompt_with_all_params(self, client: Codex) -> None:
         tlm = client.tlm.prompt(
@@ -33,6 +33,8 @@ def test_method_prompt_with_all_params(self, client: Codex) -> None:
             constrain_outputs=["string"],
             options={
                 "custom_eval_criteria": [{}],
+                "disable_persistence": True,
+                "disable_trustworthiness": True,
                 "log": ["string"],
                 "max_tokens": 0,
                 "model": "model",
@@ -48,7 +50,7 @@ def test_method_prompt_with_all_params(self, client: Codex) -> None:
         )
         assert_matches_type(TlmPromptResponse, tlm, path=["response"])
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     def test_raw_response_prompt(self, client: Codex) -> None:
         response = client.tlm.with_raw_response.prompt(
@@ -60,7 +62,7 @@ def test_raw_response_prompt(self, client: Codex) -> None:
         tlm = response.parse()
         assert_matches_type(TlmPromptResponse, tlm, path=["response"])
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     def test_streaming_response_prompt(self, client: Codex) -> None:
         with client.tlm.with_streaming_response.prompt(
@@ -74,7 +76,7 @@ def test_streaming_response_prompt(self, client: Codex) -> None:
 
         assert cast(Any, response.is_closed) is True
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     def test_method_score(self, client: Codex) -> None:
         tlm = client.tlm.score(
@@ -83,7 +85,7 @@ def test_method_score(self, client: Codex) -> None:
         )
         assert_matches_type(TlmScoreResponse, tlm, path=["response"])
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     def test_method_score_with_all_params(self, client: Codex) -> None:
         tlm = client.tlm.score(
@@ -92,6 +94,8 @@ def test_method_score_with_all_params(self, client: Codex) -> None:
             constrain_outputs=["string"],
             options={
                 "custom_eval_criteria": [{}],
+                "disable_persistence": True,
+                "disable_trustworthiness": True,
                 "log": ["string"],
                 "max_tokens": 0,
                 "model": "model",
@@ -107,7 +111,7 @@ def test_method_score_with_all_params(self, client: Codex) -> None:
         )
         assert_matches_type(TlmScoreResponse, tlm, path=["response"])
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     def test_raw_response_score(self, client: Codex) -> None:
         response = client.tlm.with_raw_response.score(
@@ -120,7 +124,7 @@ def test_raw_response_score(self, client: Codex) -> None:
         tlm = response.parse()
         assert_matches_type(TlmScoreResponse, tlm, path=["response"])
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     def test_streaming_response_score(self, client: Codex) -> None:
         with client.tlm.with_streaming_response.score(
@@ -141,7 +145,7 @@ class TestAsyncTlm:
         "async_client", [False, True, {"http_client": "aiohttp"}], indirect=True, ids=["loose", "strict", "aiohttp"]
     )
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     async def test_method_prompt(self, async_client: AsyncCodex) -> None:
         tlm = await async_client.tlm.prompt(
@@ -149,7 +153,7 @@ async def test_method_prompt(self, async_client: AsyncCodex) -> None:
         )
         assert_matches_type(TlmPromptResponse, tlm, path=["response"])
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     async def test_method_prompt_with_all_params(self, async_client: AsyncCodex) -> None:
         tlm = await async_client.tlm.prompt(
@@ -157,6 +161,8 @@ async def test_method_prompt_with_all_params(self, async_client: AsyncCodex) ->
             constrain_outputs=["string"],
             options={
                 "custom_eval_criteria": [{}],
+                "disable_persistence": True,
+                "disable_trustworthiness": True,
                 "log": ["string"],
                 "max_tokens": 0,
                 "model": "model",
@@ -172,7 +178,7 @@ async def test_method_prompt_with_all_params(self, async_client: AsyncCodex) ->
         )
         assert_matches_type(TlmPromptResponse, tlm, path=["response"])
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     async def test_raw_response_prompt(self, async_client: AsyncCodex) -> None:
         response = await async_client.tlm.with_raw_response.prompt(
@@ -184,7 +190,7 @@ async def test_raw_response_prompt(self, async_client: AsyncCodex) -> None:
         tlm = await response.parse()
         assert_matches_type(TlmPromptResponse, tlm, path=["response"])
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     async def test_streaming_response_prompt(self, async_client: AsyncCodex) -> None:
         async with async_client.tlm.with_streaming_response.prompt(
@@ -198,7 +204,7 @@ async def test_streaming_response_prompt(self, async_client: AsyncCodex) -> None
 
         assert cast(Any, response.is_closed) is True
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     async def test_method_score(self, async_client: AsyncCodex) -> None:
         tlm = await async_client.tlm.score(
@@ -207,7 +213,7 @@ async def test_method_score(self, async_client: AsyncCodex) -> None:
         )
         assert_matches_type(TlmScoreResponse, tlm, path=["response"])
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     async def test_method_score_with_all_params(self, async_client: AsyncCodex) -> None:
         tlm = await async_client.tlm.score(
@@ -216,6 +222,8 @@ async def test_method_score_with_all_params(self, async_client: AsyncCodex) -> N
             constrain_outputs=["string"],
             options={
                 "custom_eval_criteria": [{}],
+                "disable_persistence": True,
+                "disable_trustworthiness": True,
                 "log": ["string"],
                 "max_tokens": 0,
                 "model": "model",
@@ -231,7 +239,7 @@ async def test_method_score_with_all_params(self, async_client: AsyncCodex) -> N
         )
         assert_matches_type(TlmScoreResponse, tlm, path=["response"])
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     async def test_raw_response_score(self, async_client: AsyncCodex) -> None:
         response = await async_client.tlm.with_raw_response.score(
@@ -244,7 +252,7 @@ async def test_raw_response_score(self, async_client: AsyncCodex) -> None:
         tlm = await response.parse()
         assert_matches_type(TlmScoreResponse, tlm, path=["response"])
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     async def test_streaming_response_score(self, async_client: AsyncCodex) -> None:
         async with async_client.tlm.with_streaming_response.score(
diff --git a/tests/api_resources/test_users.py b/tests/api_resources/test_users.py
index 661ee559..4526254d 100644
--- a/tests/api_resources/test_users.py
+++ b/tests/api_resources/test_users.py
@@ -18,7 +18,7 @@
 class TestUsers:
     parametrize = pytest.mark.parametrize("client", [False, True], indirect=True, ids=["loose", "strict"])
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     def test_method_activate_account(self, client: Codex) -> None:
         user = client.users.activate_account(
@@ -27,7 +27,7 @@ def test_method_activate_account(self, client: Codex) -> None:
         )
         assert_matches_type(UserSchemaPublic, user, path=["response"])
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     def test_method_activate_account_with_all_params(self, client: Codex) -> None:
         user = client.users.activate_account(
@@ -41,7 +41,7 @@ def test_method_activate_account_with_all_params(self, client: Codex) -> None:
         )
         assert_matches_type(UserSchemaPublic, user, path=["response"])
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     def test_raw_response_activate_account(self, client: Codex) -> None:
         response = client.users.with_raw_response.activate_account(
@@ -54,7 +54,7 @@ def test_raw_response_activate_account(self, client: Codex) -> None:
         user = response.parse()
         assert_matches_type(UserSchemaPublic, user, path=["response"])
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     def test_streaming_response_activate_account(self, client: Codex) -> None:
         with client.users.with_streaming_response.activate_account(
@@ -75,7 +75,7 @@ class TestAsyncUsers:
         "async_client", [False, True, {"http_client": "aiohttp"}], indirect=True, ids=["loose", "strict", "aiohttp"]
     )
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     async def test_method_activate_account(self, async_client: AsyncCodex) -> None:
         user = await async_client.users.activate_account(
@@ -84,7 +84,7 @@ async def test_method_activate_account(self, async_client: AsyncCodex) -> None:
         )
         assert_matches_type(UserSchemaPublic, user, path=["response"])
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     async def test_method_activate_account_with_all_params(self, async_client: AsyncCodex) -> None:
         user = await async_client.users.activate_account(
@@ -98,7 +98,7 @@ async def test_method_activate_account_with_all_params(self, async_client: Async
         )
         assert_matches_type(UserSchemaPublic, user, path=["response"])
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     async def test_raw_response_activate_account(self, async_client: AsyncCodex) -> None:
         response = await async_client.users.with_raw_response.activate_account(
@@ -111,7 +111,7 @@ async def test_raw_response_activate_account(self, async_client: AsyncCodex) ->
         user = await response.parse()
         assert_matches_type(UserSchemaPublic, user, path=["response"])
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     async def test_streaming_response_activate_account(self, async_client: AsyncCodex) -> None:
         async with async_client.users.with_streaming_response.activate_account(
diff --git a/tests/api_resources/users/myself/test_api_key.py b/tests/api_resources/users/myself/test_api_key.py
index f0a7ccf7..65cff9ad 100644
--- a/tests/api_resources/users/myself/test_api_key.py
+++ b/tests/api_resources/users/myself/test_api_key.py
@@ -17,13 +17,13 @@
 class TestAPIKey:
     parametrize = pytest.mark.parametrize("client", [False, True], indirect=True, ids=["loose", "strict"])
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     def test_method_retrieve(self, client: Codex) -> None:
         api_key = client.users.myself.api_key.retrieve()
         assert_matches_type(UserSchemaPublic, api_key, path=["response"])
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     def test_raw_response_retrieve(self, client: Codex) -> None:
         response = client.users.myself.api_key.with_raw_response.retrieve()
@@ -33,7 +33,7 @@ def test_raw_response_retrieve(self, client: Codex) -> None:
         api_key = response.parse()
         assert_matches_type(UserSchemaPublic, api_key, path=["response"])
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     def test_streaming_response_retrieve(self, client: Codex) -> None:
         with client.users.myself.api_key.with_streaming_response.retrieve() as response:
@@ -45,13 +45,13 @@ def test_streaming_response_retrieve(self, client: Codex) -> None:
 
         assert cast(Any, response.is_closed) is True
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     def test_method_refresh(self, client: Codex) -> None:
         api_key = client.users.myself.api_key.refresh()
         assert_matches_type(UserSchema, api_key, path=["response"])
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     def test_raw_response_refresh(self, client: Codex) -> None:
         response = client.users.myself.api_key.with_raw_response.refresh()
@@ -61,7 +61,7 @@ def test_raw_response_refresh(self, client: Codex) -> None:
         api_key = response.parse()
         assert_matches_type(UserSchema, api_key, path=["response"])
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     def test_streaming_response_refresh(self, client: Codex) -> None:
         with client.users.myself.api_key.with_streaming_response.refresh() as response:
@@ -79,13 +79,13 @@ class TestAsyncAPIKey:
         "async_client", [False, True, {"http_client": "aiohttp"}], indirect=True, ids=["loose", "strict", "aiohttp"]
     )
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     async def test_method_retrieve(self, async_client: AsyncCodex) -> None:
         api_key = await async_client.users.myself.api_key.retrieve()
         assert_matches_type(UserSchemaPublic, api_key, path=["response"])
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     async def test_raw_response_retrieve(self, async_client: AsyncCodex) -> None:
         response = await async_client.users.myself.api_key.with_raw_response.retrieve()
@@ -95,7 +95,7 @@ async def test_raw_response_retrieve(self, async_client: AsyncCodex) -> None:
         api_key = await response.parse()
         assert_matches_type(UserSchemaPublic, api_key, path=["response"])
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     async def test_streaming_response_retrieve(self, async_client: AsyncCodex) -> None:
         async with async_client.users.myself.api_key.with_streaming_response.retrieve() as response:
@@ -107,13 +107,13 @@ async def test_streaming_response_retrieve(self, async_client: AsyncCodex) -> No
 
         assert cast(Any, response.is_closed) is True
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     async def test_method_refresh(self, async_client: AsyncCodex) -> None:
         api_key = await async_client.users.myself.api_key.refresh()
         assert_matches_type(UserSchema, api_key, path=["response"])
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     async def test_raw_response_refresh(self, async_client: AsyncCodex) -> None:
         response = await async_client.users.myself.api_key.with_raw_response.refresh()
@@ -123,7 +123,7 @@ async def test_raw_response_refresh(self, async_client: AsyncCodex) -> None:
         api_key = await response.parse()
         assert_matches_type(UserSchema, api_key, path=["response"])
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     async def test_streaming_response_refresh(self, async_client: AsyncCodex) -> None:
         async with async_client.users.myself.api_key.with_streaming_response.refresh() as response:
diff --git a/tests/api_resources/users/myself/test_organizations.py b/tests/api_resources/users/myself/test_organizations.py
index fd377ea0..d30c0b60 100644
--- a/tests/api_resources/users/myself/test_organizations.py
+++ b/tests/api_resources/users/myself/test_organizations.py
@@ -17,13 +17,13 @@
 class TestOrganizations:
     parametrize = pytest.mark.parametrize("client", [False, True], indirect=True, ids=["loose", "strict"])
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     def test_method_list(self, client: Codex) -> None:
         organization = client.users.myself.organizations.list()
         assert_matches_type(UserOrganizationsSchema, organization, path=["response"])
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     def test_raw_response_list(self, client: Codex) -> None:
         response = client.users.myself.organizations.with_raw_response.list()
@@ -33,7 +33,7 @@ def test_raw_response_list(self, client: Codex) -> None:
         organization = response.parse()
         assert_matches_type(UserOrganizationsSchema, organization, path=["response"])
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     def test_streaming_response_list(self, client: Codex) -> None:
         with client.users.myself.organizations.with_streaming_response.list() as response:
@@ -51,13 +51,13 @@ class TestAsyncOrganizations:
         "async_client", [False, True, {"http_client": "aiohttp"}], indirect=True, ids=["loose", "strict", "aiohttp"]
     )
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     async def test_method_list(self, async_client: AsyncCodex) -> None:
         organization = await async_client.users.myself.organizations.list()
         assert_matches_type(UserOrganizationsSchema, organization, path=["response"])
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     async def test_raw_response_list(self, async_client: AsyncCodex) -> None:
         response = await async_client.users.myself.organizations.with_raw_response.list()
@@ -67,7 +67,7 @@ async def test_raw_response_list(self, async_client: AsyncCodex) -> None:
         organization = await response.parse()
         assert_matches_type(UserOrganizationsSchema, organization, path=["response"])
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     async def test_streaming_response_list(self, async_client: AsyncCodex) -> None:
         async with async_client.users.myself.organizations.with_streaming_response.list() as response:
diff --git a/tests/api_resources/users/test_myself.py b/tests/api_resources/users/test_myself.py
index 1c56b0be..f86d54cb 100644
--- a/tests/api_resources/users/test_myself.py
+++ b/tests/api_resources/users/test_myself.py
@@ -17,13 +17,13 @@
 class TestMyself:
     parametrize = pytest.mark.parametrize("client", [False, True], indirect=True, ids=["loose", "strict"])
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     def test_method_retrieve(self, client: Codex) -> None:
         myself = client.users.myself.retrieve()
         assert_matches_type(UserSchemaPublic, myself, path=["response"])
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     def test_raw_response_retrieve(self, client: Codex) -> None:
         response = client.users.myself.with_raw_response.retrieve()
@@ -33,7 +33,7 @@ def test_raw_response_retrieve(self, client: Codex) -> None:
         myself = response.parse()
         assert_matches_type(UserSchemaPublic, myself, path=["response"])
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     def test_streaming_response_retrieve(self, client: Codex) -> None:
         with client.users.myself.with_streaming_response.retrieve() as response:
@@ -51,13 +51,13 @@ class TestAsyncMyself:
         "async_client", [False, True, {"http_client": "aiohttp"}], indirect=True, ids=["loose", "strict", "aiohttp"]
     )
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     async def test_method_retrieve(self, async_client: AsyncCodex) -> None:
         myself = await async_client.users.myself.retrieve()
         assert_matches_type(UserSchemaPublic, myself, path=["response"])
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     async def test_raw_response_retrieve(self, async_client: AsyncCodex) -> None:
         response = await async_client.users.myself.with_raw_response.retrieve()
@@ -67,7 +67,7 @@ async def test_raw_response_retrieve(self, async_client: AsyncCodex) -> None:
         myself = await response.parse()
         assert_matches_type(UserSchemaPublic, myself, path=["response"])
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     async def test_streaming_response_retrieve(self, async_client: AsyncCodex) -> None:
         async with async_client.users.myself.with_streaming_response.retrieve() as response:
diff --git a/tests/api_resources/users/test_verification.py b/tests/api_resources/users/test_verification.py
index fbf6b667..3fca3582 100644
--- a/tests/api_resources/users/test_verification.py
+++ b/tests/api_resources/users/test_verification.py
@@ -17,13 +17,13 @@
 class TestVerification:
     parametrize = pytest.mark.parametrize("client", [False, True], indirect=True, ids=["loose", "strict"])
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     def test_method_resend(self, client: Codex) -> None:
         verification = client.users.verification.resend()
         assert_matches_type(VerificationResendResponse, verification, path=["response"])
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     def test_raw_response_resend(self, client: Codex) -> None:
         response = client.users.verification.with_raw_response.resend()
@@ -33,7 +33,7 @@ def test_raw_response_resend(self, client: Codex) -> None:
         verification = response.parse()
         assert_matches_type(VerificationResendResponse, verification, path=["response"])
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     def test_streaming_response_resend(self, client: Codex) -> None:
         with client.users.verification.with_streaming_response.resend() as response:
@@ -51,13 +51,13 @@ class TestAsyncVerification:
         "async_client", [False, True, {"http_client": "aiohttp"}], indirect=True, ids=["loose", "strict", "aiohttp"]
     )
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     async def test_method_resend(self, async_client: AsyncCodex) -> None:
         verification = await async_client.users.verification.resend()
         assert_matches_type(VerificationResendResponse, verification, path=["response"])
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     async def test_raw_response_resend(self, async_client: AsyncCodex) -> None:
         response = await async_client.users.verification.with_raw_response.resend()
@@ -67,7 +67,7 @@ async def test_raw_response_resend(self, async_client: AsyncCodex) -> None:
         verification = await response.parse()
         assert_matches_type(VerificationResendResponse, verification, path=["response"])
 
-    @pytest.mark.skip()
+    @pytest.mark.skip(reason="Prism tests are disabled")
     @parametrize
     async def test_streaming_response_resend(self, async_client: AsyncCodex) -> None:
         async with async_client.users.verification.with_streaming_response.resend() as response:
diff --git a/tests/conftest.py b/tests/conftest.py
index 3472c36d..ed898e38 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -45,6 +45,8 @@ def pytest_collection_modifyitems(items: list[pytest.Function]) -> None:
 
 base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010")
 
+auth_token = "My Auth Token"
+
 
 @pytest.fixture(scope="session")
 def client(request: FixtureRequest) -> Iterator[Codex]:
@@ -52,7 +54,7 @@ def client(request: FixtureRequest) -> Iterator[Codex]:
     if not isinstance(strict, bool):
         raise TypeError(f"Unexpected fixture parameter type {type(strict)}, expected {bool}")
 
-    with Codex(base_url=base_url, _strict_response_validation=strict) as client:
+    with Codex(base_url=base_url, auth_token=auth_token, _strict_response_validation=strict) as client:
         yield client
 
 
@@ -76,5 +78,7 @@ async def async_client(request: FixtureRequest) -> AsyncIterator[AsyncCodex]:
     else:
         raise TypeError(f"Unexpected fixture parameter type {type(param)}, expected bool or dict")
 
-    async with AsyncCodex(base_url=base_url, _strict_response_validation=strict, http_client=http_client) as client:
+    async with AsyncCodex(
+        base_url=base_url, auth_token=auth_token, _strict_response_validation=strict, http_client=http_client
+    ) as client:
         yield client
diff --git a/tests/test_client.py b/tests/test_client.py
index 24749152..cdc717fd 100644
--- a/tests/test_client.py
+++ b/tests/test_client.py
@@ -37,6 +37,7 @@
 from .utils import update_env
 
 base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010")
+auth_token = "My Auth Token"
 
 
 def _get_params(client: BaseClient[Any, Any]) -> dict[str, str]:
@@ -58,7 +59,7 @@ def _get_open_connections(client: Codex | AsyncCodex) -> int:
 
 
 class TestCodex:
-    client = Codex(base_url=base_url, _strict_response_validation=True)
+    client = Codex(base_url=base_url, auth_token=auth_token, _strict_response_validation=True)
 
     @pytest.mark.respx(base_url=base_url)
     def test_raw_response(self, respx_mock: MockRouter) -> None:
@@ -84,6 +85,10 @@ def test_copy(self) -> None:
         copied = self.client.copy()
         assert id(copied) != id(self.client)
 
+        copied = self.client.copy(auth_token="another My Auth Token")
+        assert copied.auth_token == "another My Auth Token"
+        assert self.client.auth_token == "My Auth Token"
+
     def test_copy_default_options(self) -> None:
         # options that have a default are overridden correctly
         copied = self.client.copy(max_retries=7)
@@ -101,7 +106,9 @@ def test_copy_default_options(self) -> None:
         assert isinstance(self.client.timeout, httpx.Timeout)
 
     def test_copy_default_headers(self) -> None:
-        client = Codex(base_url=base_url, _strict_response_validation=True, default_headers={"X-Foo": "bar"})
+        client = Codex(
+            base_url=base_url, auth_token=auth_token, _strict_response_validation=True, default_headers={"X-Foo": "bar"}
+        )
         assert client.default_headers["X-Foo"] == "bar"
 
         # does not override the already given value when not specified
@@ -133,7 +140,9 @@ def test_copy_default_headers(self) -> None:
             client.copy(set_default_headers={}, default_headers={"X-Foo": "Bar"})
 
     def test_copy_default_query(self) -> None:
-        client = Codex(base_url=base_url, _strict_response_validation=True, default_query={"foo": "bar"})
+        client = Codex(
+            base_url=base_url, auth_token=auth_token, _strict_response_validation=True, default_query={"foo": "bar"}
+        )
         assert _get_params(client)["foo"] == "bar"
 
         # does not override the already given value when not specified
@@ -257,7 +266,9 @@ def test_request_timeout(self) -> None:
         assert timeout == httpx.Timeout(100.0)
 
     def test_client_timeout_option(self) -> None:
-        client = Codex(base_url=base_url, _strict_response_validation=True, timeout=httpx.Timeout(0))
+        client = Codex(
+            base_url=base_url, auth_token=auth_token, _strict_response_validation=True, timeout=httpx.Timeout(0)
+        )
 
         request = client._build_request(FinalRequestOptions(method="get", url="/foo"))
         timeout = httpx.Timeout(**request.extensions["timeout"])  # type: ignore
@@ -266,7 +277,9 @@ def test_client_timeout_option(self) -> None:
     def test_http_client_timeout_option(self) -> None:
         # custom timeout given to the httpx client should be used
         with httpx.Client(timeout=None) as http_client:
-            client = Codex(base_url=base_url, _strict_response_validation=True, http_client=http_client)
+            client = Codex(
+                base_url=base_url, auth_token=auth_token, _strict_response_validation=True, http_client=http_client
+            )
 
             request = client._build_request(FinalRequestOptions(method="get", url="/foo"))
             timeout = httpx.Timeout(**request.extensions["timeout"])  # type: ignore
@@ -274,7 +287,9 @@ def test_http_client_timeout_option(self) -> None:
 
         # no timeout given to the httpx client should not use the httpx default
         with httpx.Client() as http_client:
-            client = Codex(base_url=base_url, _strict_response_validation=True, http_client=http_client)
+            client = Codex(
+                base_url=base_url, auth_token=auth_token, _strict_response_validation=True, http_client=http_client
+            )
 
             request = client._build_request(FinalRequestOptions(method="get", url="/foo"))
             timeout = httpx.Timeout(**request.extensions["timeout"])  # type: ignore
@@ -282,7 +297,9 @@ def test_http_client_timeout_option(self) -> None:
 
         # explicitly passing the default timeout currently results in it being ignored
         with httpx.Client(timeout=HTTPX_DEFAULT_TIMEOUT) as http_client:
-            client = Codex(base_url=base_url, _strict_response_validation=True, http_client=http_client)
+            client = Codex(
+                base_url=base_url, auth_token=auth_token, _strict_response_validation=True, http_client=http_client
+            )
 
             request = client._build_request(FinalRequestOptions(method="get", url="/foo"))
             timeout = httpx.Timeout(**request.extensions["timeout"])  # type: ignore
@@ -291,16 +308,24 @@ def test_http_client_timeout_option(self) -> None:
     async def test_invalid_http_client(self) -> None:
         with pytest.raises(TypeError, match="Invalid `http_client` arg"):
             async with httpx.AsyncClient() as http_client:
-                Codex(base_url=base_url, _strict_response_validation=True, http_client=cast(Any, http_client))
+                Codex(
+                    base_url=base_url,
+                    auth_token=auth_token,
+                    _strict_response_validation=True,
+                    http_client=cast(Any, http_client),
+                )
 
     def test_default_headers_option(self) -> None:
-        client = Codex(base_url=base_url, _strict_response_validation=True, default_headers={"X-Foo": "bar"})
+        client = Codex(
+            base_url=base_url, auth_token=auth_token, _strict_response_validation=True, default_headers={"X-Foo": "bar"}
+        )
         request = client._build_request(FinalRequestOptions(method="get", url="/foo"))
         assert request.headers.get("x-foo") == "bar"
         assert request.headers.get("x-stainless-lang") == "python"
 
         client2 = Codex(
             base_url=base_url,
+            auth_token=auth_token,
             _strict_response_validation=True,
             default_headers={
                 "X-Foo": "stainless",
@@ -312,7 +337,12 @@ def test_default_headers_option(self) -> None:
         assert request.headers.get("x-stainless-lang") == "my-overriding-header"
 
     def test_default_query_option(self) -> None:
-        client = Codex(base_url=base_url, _strict_response_validation=True, default_query={"query_param": "bar"})
+        client = Codex(
+            base_url=base_url,
+            auth_token=auth_token,
+            _strict_response_validation=True,
+            default_query={"query_param": "bar"},
+        )
         request = client._build_request(FinalRequestOptions(method="get", url="/foo"))
         url = httpx.URL(request.url)
         assert dict(url.params) == {"query_param": "bar"}
@@ -511,7 +541,9 @@ class Model(BaseModel):
         assert response.foo == 2
 
     def test_base_url_setter(self) -> None:
-        client = Codex(base_url="https://example.com/from_init", _strict_response_validation=True)
+        client = Codex(
+            base_url="https://example.com/from_init", auth_token=auth_token, _strict_response_validation=True
+        )
         assert client.base_url == "https://example.com/from_init/"
 
         client.base_url = "https://example.com/from_setter"  # type: ignore[assignment]
@@ -520,23 +552,28 @@ def test_base_url_setter(self) -> None:
 
     def test_base_url_env(self) -> None:
         with update_env(CODEX_BASE_URL="http://localhost:5000/from/env"):
-            client = Codex(_strict_response_validation=True)
+            client = Codex(auth_token=auth_token, _strict_response_validation=True)
             assert client.base_url == "http://localhost:5000/from/env/"
 
         # explicit environment arg requires explicitness
         with update_env(CODEX_BASE_URL="http://localhost:5000/from/env"):
             with pytest.raises(ValueError, match=r"you must pass base_url=None"):
-                Codex(_strict_response_validation=True, environment="production")
+                Codex(auth_token=auth_token, _strict_response_validation=True, environment="production")
 
-            client = Codex(base_url=None, _strict_response_validation=True, environment="production")
+            client = Codex(
+                base_url=None, auth_token=auth_token, _strict_response_validation=True, environment="production"
+            )
             assert str(client.base_url).startswith("https://api-codex.cleanlab.ai")
 
     @pytest.mark.parametrize(
         "client",
         [
-            Codex(base_url="http://localhost:5000/custom/path/", _strict_response_validation=True),
+            Codex(
+                base_url="http://localhost:5000/custom/path/", auth_token=auth_token, _strict_response_validation=True
+            ),
             Codex(
                 base_url="http://localhost:5000/custom/path/",
+                auth_token=auth_token,
                 _strict_response_validation=True,
                 http_client=httpx.Client(),
             ),
@@ -556,9 +593,12 @@ def test_base_url_trailing_slash(self, client: Codex) -> None:
     @pytest.mark.parametrize(
         "client",
         [
-            Codex(base_url="http://localhost:5000/custom/path/", _strict_response_validation=True),
+            Codex(
+                base_url="http://localhost:5000/custom/path/", auth_token=auth_token, _strict_response_validation=True
+            ),
             Codex(
                 base_url="http://localhost:5000/custom/path/",
+                auth_token=auth_token,
                 _strict_response_validation=True,
                 http_client=httpx.Client(),
             ),
@@ -578,9 +618,12 @@ def test_base_url_no_trailing_slash(self, client: Codex) -> None:
     @pytest.mark.parametrize(
         "client",
         [
-            Codex(base_url="http://localhost:5000/custom/path/", _strict_response_validation=True),
+            Codex(
+                base_url="http://localhost:5000/custom/path/", auth_token=auth_token, _strict_response_validation=True
+            ),
             Codex(
                 base_url="http://localhost:5000/custom/path/",
+                auth_token=auth_token,
                 _strict_response_validation=True,
                 http_client=httpx.Client(),
             ),
@@ -598,7 +641,7 @@ def test_absolute_request_url(self, client: Codex) -> None:
         assert request.url == "https://myapi.com/foo"
 
     def test_copied_client_does_not_close_http(self) -> None:
-        client = Codex(base_url=base_url, _strict_response_validation=True)
+        client = Codex(base_url=base_url, auth_token=auth_token, _strict_response_validation=True)
         assert not client.is_closed()
 
         copied = client.copy()
@@ -609,7 +652,7 @@ def test_copied_client_does_not_close_http(self) -> None:
         assert not client.is_closed()
 
     def test_client_context_manager(self) -> None:
-        client = Codex(base_url=base_url, _strict_response_validation=True)
+        client = Codex(base_url=base_url, auth_token=auth_token, _strict_response_validation=True)
         with client as c2:
             assert c2 is client
             assert not c2.is_closed()
@@ -630,7 +673,9 @@ class Model(BaseModel):
 
     def test_client_max_retries_validation(self) -> None:
         with pytest.raises(TypeError, match=r"max_retries cannot be None"):
-            Codex(base_url=base_url, _strict_response_validation=True, max_retries=cast(Any, None))
+            Codex(
+                base_url=base_url, auth_token=auth_token, _strict_response_validation=True, max_retries=cast(Any, None)
+            )
 
     @pytest.mark.respx(base_url=base_url)
     def test_received_text_for_expected_json(self, respx_mock: MockRouter) -> None:
@@ -639,12 +684,12 @@ class Model(BaseModel):
 
         respx_mock.get("/foo").mock(return_value=httpx.Response(200, text="my-custom-format"))
 
-        strict_client = Codex(base_url=base_url, _strict_response_validation=True)
+        strict_client = Codex(base_url=base_url, auth_token=auth_token, _strict_response_validation=True)
 
         with pytest.raises(APIResponseValidationError):
             strict_client.get("/foo", cast_to=Model)
 
-        client = Codex(base_url=base_url, _strict_response_validation=False)
+        client = Codex(base_url=base_url, auth_token=auth_token, _strict_response_validation=False)
 
         response = client.get("/foo", cast_to=Model)
         assert isinstance(response, str)  # type: ignore[unreachable]
@@ -672,7 +717,7 @@ class Model(BaseModel):
     )
     @mock.patch("time.time", mock.MagicMock(return_value=1696004797))
     def test_parse_retry_after_header(self, remaining_retries: int, retry_after: str, timeout: float) -> None:
-        client = Codex(base_url=base_url, _strict_response_validation=True)
+        client = Codex(base_url=base_url, auth_token=auth_token, _strict_response_validation=True)
 
         headers = httpx.Headers({"retry-after": retry_after})
         options = FinalRequestOptions(method="get", url="/foo", max_retries=3)
@@ -840,7 +885,7 @@ def test_follow_redirects_disabled(self, respx_mock: MockRouter) -> None:
 
 
 class TestAsyncCodex:
-    client = AsyncCodex(base_url=base_url, _strict_response_validation=True)
+    client = AsyncCodex(base_url=base_url, auth_token=auth_token, _strict_response_validation=True)
 
     @pytest.mark.respx(base_url=base_url)
     @pytest.mark.asyncio
@@ -868,6 +913,10 @@ def test_copy(self) -> None:
         copied = self.client.copy()
         assert id(copied) != id(self.client)
 
+        copied = self.client.copy(auth_token="another My Auth Token")
+        assert copied.auth_token == "another My Auth Token"
+        assert self.client.auth_token == "My Auth Token"
+
     def test_copy_default_options(self) -> None:
         # options that have a default are overridden correctly
         copied = self.client.copy(max_retries=7)
@@ -885,7 +934,9 @@ def test_copy_default_options(self) -> None:
         assert isinstance(self.client.timeout, httpx.Timeout)
 
     def test_copy_default_headers(self) -> None:
-        client = AsyncCodex(base_url=base_url, _strict_response_validation=True, default_headers={"X-Foo": "bar"})
+        client = AsyncCodex(
+            base_url=base_url, auth_token=auth_token, _strict_response_validation=True, default_headers={"X-Foo": "bar"}
+        )
         assert client.default_headers["X-Foo"] == "bar"
 
         # does not override the already given value when not specified
@@ -917,7 +968,9 @@ def test_copy_default_headers(self) -> None:
             client.copy(set_default_headers={}, default_headers={"X-Foo": "Bar"})
 
     def test_copy_default_query(self) -> None:
-        client = AsyncCodex(base_url=base_url, _strict_response_validation=True, default_query={"foo": "bar"})
+        client = AsyncCodex(
+            base_url=base_url, auth_token=auth_token, _strict_response_validation=True, default_query={"foo": "bar"}
+        )
         assert _get_params(client)["foo"] == "bar"
 
         # does not override the already given value when not specified
@@ -1041,7 +1094,9 @@ async def test_request_timeout(self) -> None:
         assert timeout == httpx.Timeout(100.0)
 
     async def test_client_timeout_option(self) -> None:
-        client = AsyncCodex(base_url=base_url, _strict_response_validation=True, timeout=httpx.Timeout(0))
+        client = AsyncCodex(
+            base_url=base_url, auth_token=auth_token, _strict_response_validation=True, timeout=httpx.Timeout(0)
+        )
 
         request = client._build_request(FinalRequestOptions(method="get", url="/foo"))
         timeout = httpx.Timeout(**request.extensions["timeout"])  # type: ignore
@@ -1050,7 +1105,9 @@ async def test_client_timeout_option(self) -> None:
     async def test_http_client_timeout_option(self) -> None:
         # custom timeout given to the httpx client should be used
         async with httpx.AsyncClient(timeout=None) as http_client:
-            client = AsyncCodex(base_url=base_url, _strict_response_validation=True, http_client=http_client)
+            client = AsyncCodex(
+                base_url=base_url, auth_token=auth_token, _strict_response_validation=True, http_client=http_client
+            )
 
             request = client._build_request(FinalRequestOptions(method="get", url="/foo"))
             timeout = httpx.Timeout(**request.extensions["timeout"])  # type: ignore
@@ -1058,7 +1115,9 @@ async def test_http_client_timeout_option(self) -> None:
 
         # no timeout given to the httpx client should not use the httpx default
         async with httpx.AsyncClient() as http_client:
-            client = AsyncCodex(base_url=base_url, _strict_response_validation=True, http_client=http_client)
+            client = AsyncCodex(
+                base_url=base_url, auth_token=auth_token, _strict_response_validation=True, http_client=http_client
+            )
 
             request = client._build_request(FinalRequestOptions(method="get", url="/foo"))
             timeout = httpx.Timeout(**request.extensions["timeout"])  # type: ignore
@@ -1066,7 +1125,9 @@ async def test_http_client_timeout_option(self) -> None:
 
         # explicitly passing the default timeout currently results in it being ignored
         async with httpx.AsyncClient(timeout=HTTPX_DEFAULT_TIMEOUT) as http_client:
-            client = AsyncCodex(base_url=base_url, _strict_response_validation=True, http_client=http_client)
+            client = AsyncCodex(
+                base_url=base_url, auth_token=auth_token, _strict_response_validation=True, http_client=http_client
+            )
 
             request = client._build_request(FinalRequestOptions(method="get", url="/foo"))
             timeout = httpx.Timeout(**request.extensions["timeout"])  # type: ignore
@@ -1075,16 +1136,24 @@ async def test_http_client_timeout_option(self) -> None:
     def test_invalid_http_client(self) -> None:
         with pytest.raises(TypeError, match="Invalid `http_client` arg"):
             with httpx.Client() as http_client:
-                AsyncCodex(base_url=base_url, _strict_response_validation=True, http_client=cast(Any, http_client))
+                AsyncCodex(
+                    base_url=base_url,
+                    auth_token=auth_token,
+                    _strict_response_validation=True,
+                    http_client=cast(Any, http_client),
+                )
 
     def test_default_headers_option(self) -> None:
-        client = AsyncCodex(base_url=base_url, _strict_response_validation=True, default_headers={"X-Foo": "bar"})
+        client = AsyncCodex(
+            base_url=base_url, auth_token=auth_token, _strict_response_validation=True, default_headers={"X-Foo": "bar"}
+        )
         request = client._build_request(FinalRequestOptions(method="get", url="/foo"))
         assert request.headers.get("x-foo") == "bar"
         assert request.headers.get("x-stainless-lang") == "python"
 
         client2 = AsyncCodex(
             base_url=base_url,
+            auth_token=auth_token,
             _strict_response_validation=True,
             default_headers={
                 "X-Foo": "stainless",
@@ -1096,7 +1165,12 @@ def test_default_headers_option(self) -> None:
         assert request.headers.get("x-stainless-lang") == "my-overriding-header"
 
     def test_default_query_option(self) -> None:
-        client = AsyncCodex(base_url=base_url, _strict_response_validation=True, default_query={"query_param": "bar"})
+        client = AsyncCodex(
+            base_url=base_url,
+            auth_token=auth_token,
+            _strict_response_validation=True,
+            default_query={"query_param": "bar"},
+        )
         request = client._build_request(FinalRequestOptions(method="get", url="/foo"))
         url = httpx.URL(request.url)
         assert dict(url.params) == {"query_param": "bar"}
@@ -1295,7 +1369,9 @@ class Model(BaseModel):
         assert response.foo == 2
 
     def test_base_url_setter(self) -> None:
-        client = AsyncCodex(base_url="https://example.com/from_init", _strict_response_validation=True)
+        client = AsyncCodex(
+            base_url="https://example.com/from_init", auth_token=auth_token, _strict_response_validation=True
+        )
         assert client.base_url == "https://example.com/from_init/"
 
         client.base_url = "https://example.com/from_setter"  # type: ignore[assignment]
@@ -1304,23 +1380,28 @@ def test_base_url_setter(self) -> None:
 
     def test_base_url_env(self) -> None:
         with update_env(CODEX_BASE_URL="http://localhost:5000/from/env"):
-            client = AsyncCodex(_strict_response_validation=True)
+            client = AsyncCodex(auth_token=auth_token, _strict_response_validation=True)
             assert client.base_url == "http://localhost:5000/from/env/"
 
         # explicit environment arg requires explicitness
         with update_env(CODEX_BASE_URL="http://localhost:5000/from/env"):
             with pytest.raises(ValueError, match=r"you must pass base_url=None"):
-                AsyncCodex(_strict_response_validation=True, environment="production")
+                AsyncCodex(auth_token=auth_token, _strict_response_validation=True, environment="production")
 
-            client = AsyncCodex(base_url=None, _strict_response_validation=True, environment="production")
+            client = AsyncCodex(
+                base_url=None, auth_token=auth_token, _strict_response_validation=True, environment="production"
+            )
             assert str(client.base_url).startswith("https://api-codex.cleanlab.ai")
 
     @pytest.mark.parametrize(
         "client",
         [
-            AsyncCodex(base_url="http://localhost:5000/custom/path/", _strict_response_validation=True),
+            AsyncCodex(
+                base_url="http://localhost:5000/custom/path/", auth_token=auth_token, _strict_response_validation=True
+            ),
             AsyncCodex(
                 base_url="http://localhost:5000/custom/path/",
+                auth_token=auth_token,
                 _strict_response_validation=True,
                 http_client=httpx.AsyncClient(),
             ),
@@ -1340,9 +1421,12 @@ def test_base_url_trailing_slash(self, client: AsyncCodex) -> None:
     @pytest.mark.parametrize(
         "client",
         [
-            AsyncCodex(base_url="http://localhost:5000/custom/path/", _strict_response_validation=True),
+            AsyncCodex(
+                base_url="http://localhost:5000/custom/path/", auth_token=auth_token, _strict_response_validation=True
+            ),
             AsyncCodex(
                 base_url="http://localhost:5000/custom/path/",
+                auth_token=auth_token,
                 _strict_response_validation=True,
                 http_client=httpx.AsyncClient(),
             ),
@@ -1362,9 +1446,12 @@ def test_base_url_no_trailing_slash(self, client: AsyncCodex) -> None:
     @pytest.mark.parametrize(
         "client",
         [
-            AsyncCodex(base_url="http://localhost:5000/custom/path/", _strict_response_validation=True),
+            AsyncCodex(
+                base_url="http://localhost:5000/custom/path/", auth_token=auth_token, _strict_response_validation=True
+            ),
             AsyncCodex(
                 base_url="http://localhost:5000/custom/path/",
+                auth_token=auth_token,
                 _strict_response_validation=True,
                 http_client=httpx.AsyncClient(),
             ),
@@ -1382,7 +1469,7 @@ def test_absolute_request_url(self, client: AsyncCodex) -> None:
         assert request.url == "https://myapi.com/foo"
 
     async def test_copied_client_does_not_close_http(self) -> None:
-        client = AsyncCodex(base_url=base_url, _strict_response_validation=True)
+        client = AsyncCodex(base_url=base_url, auth_token=auth_token, _strict_response_validation=True)
         assert not client.is_closed()
 
         copied = client.copy()
@@ -1394,7 +1481,7 @@ async def test_copied_client_does_not_close_http(self) -> None:
         assert not client.is_closed()
 
     async def test_client_context_manager(self) -> None:
-        client = AsyncCodex(base_url=base_url, _strict_response_validation=True)
+        client = AsyncCodex(base_url=base_url, auth_token=auth_token, _strict_response_validation=True)
         async with client as c2:
             assert c2 is client
             assert not c2.is_closed()
@@ -1416,7 +1503,9 @@ class Model(BaseModel):
 
     async def test_client_max_retries_validation(self) -> None:
         with pytest.raises(TypeError, match=r"max_retries cannot be None"):
-            AsyncCodex(base_url=base_url, _strict_response_validation=True, max_retries=cast(Any, None))
+            AsyncCodex(
+                base_url=base_url, auth_token=auth_token, _strict_response_validation=True, max_retries=cast(Any, None)
+            )
 
     @pytest.mark.respx(base_url=base_url)
     @pytest.mark.asyncio
@@ -1426,12 +1515,12 @@ class Model(BaseModel):
 
         respx_mock.get("/foo").mock(return_value=httpx.Response(200, text="my-custom-format"))
 
-        strict_client = AsyncCodex(base_url=base_url, _strict_response_validation=True)
+        strict_client = AsyncCodex(base_url=base_url, auth_token=auth_token, _strict_response_validation=True)
 
         with pytest.raises(APIResponseValidationError):
             await strict_client.get("/foo", cast_to=Model)
 
-        client = AsyncCodex(base_url=base_url, _strict_response_validation=False)
+        client = AsyncCodex(base_url=base_url, auth_token=auth_token, _strict_response_validation=False)
 
         response = await client.get("/foo", cast_to=Model)
         assert isinstance(response, str)  # type: ignore[unreachable]
@@ -1460,7 +1549,7 @@ class Model(BaseModel):
     @mock.patch("time.time", mock.MagicMock(return_value=1696004797))
     @pytest.mark.asyncio
     async def test_parse_retry_after_header(self, remaining_retries: int, retry_after: str, timeout: float) -> None:
-        client = AsyncCodex(base_url=base_url, _strict_response_validation=True)
+        client = AsyncCodex(base_url=base_url, auth_token=auth_token, _strict_response_validation=True)
 
         headers = httpx.Headers({"retry-after": retry_after})
         options = FinalRequestOptions(method="get", url="/foo", max_retries=3)