Set lower and upper limits of utilization_policy.time_window (#2386)

un-def · web-flow · commit 03ceb16365ec · 2025-03-05T11:20:14.000Z
Fixes: #2384
diff --git a/docs/docs/reference/dstack.yml/dev-environment.md b/docs/docs/reference/dstack.yml/dev-environment.md
@@ -18,6 +18,14 @@ The `dev-environment` configuration type allows running [dev environments](../..
       type:
         required: true
 
+### `utilization_policy`
+
+#SCHEMA# dstack._internal.core.models.profiles.UtilizationPolicy
+    overrides:
+      show_root_heading: false
+      type:
+        required: true
+
 ### `resources`
 
 #SCHEMA# dstack._internal.core.models.resources.ResourcesSpecSchema
diff --git a/docs/docs/reference/dstack.yml/service.md b/docs/docs/reference/dstack.yml/service.md
@@ -24,31 +24,31 @@ The `service` configuration type allows running [services](../../concepts/servic
 
     > TGI provides an OpenAI-compatible API starting with version 1.4.0,
     so models served by TGI can be defined with `format: openai` too.
-    
+
     #SCHEMA# dstack.api.TGIChatModel
         overrides:
           show_root_heading: false
           type:
             required: true
 
     ??? info "Chat template"
-    
+
         By default, `dstack` loads the [chat template](https://huggingface.co/docs/transformers/main/en/chat_templating)
         from the model's repository. If it is not present there, manual configuration is required.
-    
+
         ```yaml
         type: service
-    
+
         image: ghcr.io/huggingface/text-generation-inference:latest
         env:
           - MODEL_ID=TheBloke/Llama-2-13B-chat-GPTQ
         commands:
           - text-generation-launcher --port 8000 --trust-remote-code --quantize gptq
         port: 8000
-    
+
         resources:
           gpu: 80GB
-    
+
         # Enable the OpenAI-compatible endpoint
         model:
           type: chat
@@ -57,13 +57,13 @@ The `service` configuration type allows running [services](../../concepts/servic
           chat_template: "{% if messages[0]['role'] == 'system' %}{% set loop_messages = messages[1:] %}{% set system_message = messages[0]['content'] %}{% else %}{% set loop_messages = messages %}{% set system_message = false %}{% endif %}{% for message in loop_messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if loop.index0 == 0 and system_message != false %}{% set content = '<<SYS>>\\n' + system_message + '\\n<</SYS>>\\n\\n' + message['content'] %}{% else %}{% set content = message['content'] %}{% endif %}{% if message['role'] == 'user' %}{{ '<s>[INST] ' + content.strip() + ' [/INST]' }}{% elif message['role'] == 'assistant' %}{{ ' '  + content.strip() + ' </s>' }}{% endif %}{% endfor %}"
           eos_token: "</s>"
         ```
-    
+
         Please note that model mapping is an experimental feature with the following limitations:
-    
+
         1. Doesn't work if your `chat_template` uses `bos_token`. As a workaround, replace `bos_token` inside `chat_template` with the token content itself.
         2. Doesn't work if `eos_token` is defined in the model repository as a dictionary. As a workaround, set `eos_token` manually, as shown in the example above (see Chat template).
-    
-        If you encounter any other issues, please make sure to file a 
+
+        If you encounter any other issues, please make sure to file a
         [GitHub issue :material-arrow-top-right-thin:{ .external }](https://github.com/dstackai/dstack/issues/new/choose){:target="_blank"}.
 
 ### `scaling`
@@ -80,6 +80,14 @@ The `service` configuration type allows running [services](../../concepts/servic
     overrides:
       show_root_heading: false
 
+### `utilization_policy`
+
+#SCHEMA# dstack._internal.core.models.profiles.UtilizationPolicy
+    overrides:
+      show_root_heading: false
+      type:
+        required: true
+
 ### `resources`
 
 #SCHEMA# dstack._internal.core.models.resources.ResourcesSpecSchema
diff --git a/docs/docs/reference/dstack.yml/task.md b/docs/docs/reference/dstack.yml/task.md
@@ -18,6 +18,14 @@ The `task` configuration type allows running [tasks](../../concepts/tasks.md).
       type:
         required: true
 
+### `utilization_policy`
+
+#SCHEMA# dstack._internal.core.models.profiles.UtilizationPolicy
+    overrides:
+      show_root_heading: false
+      type:
+        required: true
+
 ### `resources`
 
 #SCHEMA# dstack._internal.core.models.resources.ResourcesSpecSchema
diff --git a/src/dstack/_internal/core/models/profiles.py b/src/dstack/_internal/core/models/profiles.py
@@ -1,5 +1,5 @@
 from enum import Enum
-from typing import List, Optional, Union
+from typing import List, Optional, Union, overload
 
 from pydantic import Field, root_validator, validator
 from typing_extensions import Annotated, Literal
@@ -34,6 +34,14 @@ class TerminationPolicy(str, Enum):
     DESTROY_AFTER_IDLE = "destroy-after-idle"
 
 
+@overload
+def parse_duration(v: None) -> None: ...
+
+
+@overload
+def parse_duration(v: Union[int, str]) -> int: ...
+
+
 def parse_duration(v: Optional[Union[int, str]]) -> Optional[int]:
     if v is None:
         return None
@@ -113,6 +121,8 @@ def _validate_fields(cls, values):
 
 
 class UtilizationPolicy(CoreModel):
+    _min_time_window = "5m"
+
     min_gpu_utilization: Annotated[
         int,
         Field(
@@ -130,12 +140,17 @@ class UtilizationPolicy(CoreModel):
         Field(
             description=(
                 "The time window of metric samples taking into account to measure utilization"
-                " (e.g., `30m`, `1h`)"
+                f" (e.g., `30m`, `1h`). Minimum is `{_min_time_window}`"
             )
         ),
     ]
 
-    _validate_time_window = validator("time_window", pre=True, allow_reuse=True)(parse_duration)
+    @validator("time_window", pre=True)
+    def validate_time_window(cls, v: Union[int, str]) -> int:
+        v = parse_duration(v)
+        if v < parse_duration(cls._min_time_window):
+            raise ValueError(f"Minimum time_window is {cls._min_time_window}")
+        return v
 
 
 class ProfileParams(CoreModel):
diff --git a/src/dstack/_internal/server/services/runs.py b/src/dstack/_internal/server/services/runs.py
@@ -48,6 +48,7 @@
 )
 from dstack._internal.core.services import validate_dstack_resource_name
 from dstack._internal.core.services.diff import diff_models
+from dstack._internal.server import settings
 from dstack._internal.server.db import get_db
 from dstack._internal.server.models import (
     JobModel,
@@ -838,6 +839,14 @@ def _validate_run_spec_and_set_defaults(run_spec: RunSpec):
         run_spec.repo_id = DEFAULT_VIRTUAL_REPO_ID
     if run_spec.repo_data is None:
         run_spec.repo_data = VirtualRunRepoData()
+    if (
+        run_spec.merged_profile.utilization_policy is not None
+        and run_spec.merged_profile.utilization_policy.time_window
+        > settings.SERVER_METRICS_TTL_SECONDS
+    ):
+        raise ServerClientError(
+            f"Maximum utilization_policy.time_window is {settings.SERVER_METRICS_TTL_SECONDS}s"
+        )
 
 
 _UPDATABLE_SPEC_FIELDS = ["repo_code_hash", "configuration"]
diff --git a/src/dstack/api/utils.py b/src/dstack/api/utils.py
@@ -2,6 +2,7 @@
 from typing import Optional, Tuple
 
 import yaml
+from pydantic import ValidationError
 
 from dstack._internal.core.errors import ConfigurationError
 from dstack._internal.core.models.configurations import AnyRunConfiguration
@@ -96,6 +97,8 @@ def _load_profile_from_path(profiles_path: Path, profile_name: Optional[str]) ->
             config = ProfilesConfig.parse_obj(yaml.safe_load(f))
     except FileNotFoundError:
         return None
+    except ValidationError as e:
+        raise ConfigurationError(e)
 
     if profile_name is None:
         return config.default()