Skip to content

Commit 03ceb16

Browse files
authored
Set lower and upper limits of utilization_policy.time_window (#2386)
Fixes: #2384
1 parent 9d41682 commit 03ceb16

File tree

6 files changed

+64
-13
lines changed

6 files changed

+64
-13
lines changed

docs/docs/reference/dstack.yml/dev-environment.md

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,14 @@ The `dev-environment` configuration type allows running [dev environments](../..
1818
type:
1919
required: true
2020

21+
### `utilization_policy`
22+
23+
#SCHEMA# dstack._internal.core.models.profiles.UtilizationPolicy
24+
overrides:
25+
show_root_heading: false
26+
type:
27+
required: true
28+
2129
### `resources`
2230

2331
#SCHEMA# dstack._internal.core.models.resources.ResourcesSpecSchema

docs/docs/reference/dstack.yml/service.md

Lines changed: 18 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -24,31 +24,31 @@ The `service` configuration type allows running [services](../../concepts/servic
2424

2525
> TGI provides an OpenAI-compatible API starting with version 1.4.0,
2626
so models served by TGI can be defined with `format: openai` too.
27-
27+
2828
#SCHEMA# dstack.api.TGIChatModel
2929
overrides:
3030
show_root_heading: false
3131
type:
3232
required: true
3333

3434
??? info "Chat template"
35-
35+
3636
By default, `dstack` loads the [chat template](https://huggingface.co/docs/transformers/main/en/chat_templating)
3737
from the model's repository. If it is not present there, manual configuration is required.
38-
38+
3939
```yaml
4040
type: service
41-
41+
4242
image: ghcr.io/huggingface/text-generation-inference:latest
4343
env:
4444
- MODEL_ID=TheBloke/Llama-2-13B-chat-GPTQ
4545
commands:
4646
- text-generation-launcher --port 8000 --trust-remote-code --quantize gptq
4747
port: 8000
48-
48+
4949
resources:
5050
gpu: 80GB
51-
51+
5252
# Enable the OpenAI-compatible endpoint
5353
model:
5454
type: chat
@@ -57,13 +57,13 @@ The `service` configuration type allows running [services](../../concepts/servic
5757
chat_template: "{% if messages[0]['role'] == 'system' %}{% set loop_messages = messages[1:] %}{% set system_message = messages[0]['content'] %}{% else %}{% set loop_messages = messages %}{% set system_message = false %}{% endif %}{% for message in loop_messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if loop.index0 == 0 and system_message != false %}{% set content = '<<SYS>>\\n' + system_message + '\\n<</SYS>>\\n\\n' + message['content'] %}{% else %}{% set content = message['content'] %}{% endif %}{% if message['role'] == 'user' %}{{ '<s>[INST] ' + content.strip() + ' [/INST]' }}{% elif message['role'] == 'assistant' %}{{ ' ' + content.strip() + ' </s>' }}{% endif %}{% endfor %}"
5858
eos_token: "</s>"
5959
```
60-
60+
6161
Please note that model mapping is an experimental feature with the following limitations:
62-
62+
6363
1. Doesn't work if your `chat_template` uses `bos_token`. As a workaround, replace `bos_token` inside `chat_template` with the token content itself.
6464
2. Doesn't work if `eos_token` is defined in the model repository as a dictionary. As a workaround, set `eos_token` manually, as shown in the example above (see Chat template).
65-
66-
If you encounter any other issues, please make sure to file a
65+
66+
If you encounter any other issues, please make sure to file a
6767
[GitHub issue :material-arrow-top-right-thin:{ .external }](https://github.com/dstackai/dstack/issues/new/choose){:target="_blank"}.
6868

6969
### `scaling`
@@ -80,6 +80,14 @@ The `service` configuration type allows running [services](../../concepts/servic
8080
overrides:
8181
show_root_heading: false
8282

83+
### `utilization_policy`
84+
85+
#SCHEMA# dstack._internal.core.models.profiles.UtilizationPolicy
86+
overrides:
87+
show_root_heading: false
88+
type:
89+
required: true
90+
8391
### `resources`
8492

8593
#SCHEMA# dstack._internal.core.models.resources.ResourcesSpecSchema

docs/docs/reference/dstack.yml/task.md

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,14 @@ The `task` configuration type allows running [tasks](../../concepts/tasks.md).
1818
type:
1919
required: true
2020

21+
### `utilization_policy`
22+
23+
#SCHEMA# dstack._internal.core.models.profiles.UtilizationPolicy
24+
overrides:
25+
show_root_heading: false
26+
type:
27+
required: true
28+
2129
### `resources`
2230

2331
#SCHEMA# dstack._internal.core.models.resources.ResourcesSpecSchema

src/dstack/_internal/core/models/profiles.py

Lines changed: 18 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
from enum import Enum
2-
from typing import List, Optional, Union
2+
from typing import List, Optional, Union, overload
33

44
from pydantic import Field, root_validator, validator
55
from typing_extensions import Annotated, Literal
@@ -34,6 +34,14 @@ class TerminationPolicy(str, Enum):
3434
DESTROY_AFTER_IDLE = "destroy-after-idle"
3535

3636

37+
@overload
38+
def parse_duration(v: None) -> None: ...
39+
40+
41+
@overload
42+
def parse_duration(v: Union[int, str]) -> int: ...
43+
44+
3745
def parse_duration(v: Optional[Union[int, str]]) -> Optional[int]:
3846
if v is None:
3947
return None
@@ -113,6 +121,8 @@ def _validate_fields(cls, values):
113121

114122

115123
class UtilizationPolicy(CoreModel):
124+
_min_time_window = "5m"
125+
116126
min_gpu_utilization: Annotated[
117127
int,
118128
Field(
@@ -130,12 +140,17 @@ class UtilizationPolicy(CoreModel):
130140
Field(
131141
description=(
132142
"The time window of metric samples taking into account to measure utilization"
133-
" (e.g., `30m`, `1h`)"
143+
f" (e.g., `30m`, `1h`). Minimum is `{_min_time_window}`"
134144
)
135145
),
136146
]
137147

138-
_validate_time_window = validator("time_window", pre=True, allow_reuse=True)(parse_duration)
148+
@validator("time_window", pre=True)
149+
def validate_time_window(cls, v: Union[int, str]) -> int:
150+
v = parse_duration(v)
151+
if v < parse_duration(cls._min_time_window):
152+
raise ValueError(f"Minimum time_window is {cls._min_time_window}")
153+
return v
139154

140155

141156
class ProfileParams(CoreModel):

src/dstack/_internal/server/services/runs.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@
4848
)
4949
from dstack._internal.core.services import validate_dstack_resource_name
5050
from dstack._internal.core.services.diff import diff_models
51+
from dstack._internal.server import settings
5152
from dstack._internal.server.db import get_db
5253
from dstack._internal.server.models import (
5354
JobModel,
@@ -838,6 +839,14 @@ def _validate_run_spec_and_set_defaults(run_spec: RunSpec):
838839
run_spec.repo_id = DEFAULT_VIRTUAL_REPO_ID
839840
if run_spec.repo_data is None:
840841
run_spec.repo_data = VirtualRunRepoData()
842+
if (
843+
run_spec.merged_profile.utilization_policy is not None
844+
and run_spec.merged_profile.utilization_policy.time_window
845+
> settings.SERVER_METRICS_TTL_SECONDS
846+
):
847+
raise ServerClientError(
848+
f"Maximum utilization_policy.time_window is {settings.SERVER_METRICS_TTL_SECONDS}s"
849+
)
841850

842851

843852
_UPDATABLE_SPEC_FIELDS = ["repo_code_hash", "configuration"]

src/dstack/api/utils.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
from typing import Optional, Tuple
33

44
import yaml
5+
from pydantic import ValidationError
56

67
from dstack._internal.core.errors import ConfigurationError
78
from dstack._internal.core.models.configurations import AnyRunConfiguration
@@ -96,6 +97,8 @@ def _load_profile_from_path(profiles_path: Path, profile_name: Optional[str]) ->
9697
config = ProfilesConfig.parse_obj(yaml.safe_load(f))
9798
except FileNotFoundError:
9899
return None
100+
except ValidationError as e:
101+
raise ConfigurationError(e)
99102

100103
if profile_name is None:
101104
return config.default()

0 commit comments

Comments
 (0)