Skip to content

Commit 542ef36

Browse files
authored
[Jobs][Consolidation] Fix signal file creation when using Postgres (#7717)
* fix * comments & skip warning for pg * format
1 parent 887055e commit 542ef36

File tree

2 files changed

+33
-19
lines changed

2 files changed

+33
-19
lines changed

sky/server/common.py

Lines changed: 21 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -539,19 +539,27 @@ def _start_api_server(deploy: bool = False,
539539
'is not a local URL')
540540

541541
# Check available memory before starting the server.
542-
avail_mem_size_gb: float = common_utils.get_mem_size_gb()
543-
# pylint: disable=import-outside-toplevel
544-
import sky.jobs.utils as job_utils
545-
max_memory = (server_constants.MIN_AVAIL_MEM_GB_CONSOLIDATION_MODE
546-
if job_utils.is_consolidation_mode(on_api_restart=True)
547-
else server_constants.MIN_AVAIL_MEM_GB)
548-
if avail_mem_size_gb <= max_memory:
549-
logger.warning(
550-
f'{colorama.Fore.YELLOW}Your SkyPilot API server machine only '
551-
f'has {avail_mem_size_gb:.1f}GB memory available. '
552-
f'At least {max_memory}GB is recommended to support higher '
553-
'load with better performance.'
554-
f'{colorama.Style.RESET_ALL}')
542+
# Skip this warning if postgres is used, as:
543+
# 1) that's almost certainly a remote API server;
544+
# 2) the actual consolidation mode config is stashed in the database,
545+
# and the value of `job_utils.is_consolidation_mode` will not be
546+
# the actual value in the db, but only None as in this case, the
547+
# whole YAML config is really just `db: <URI>`.
548+
if skypilot_config.get_nested(('db',), None) is None:
549+
avail_mem_size_gb: float = common_utils.get_mem_size_gb()
550+
# pylint: disable=import-outside-toplevel
551+
import sky.jobs.utils as job_utils
552+
max_memory = (server_constants.MIN_AVAIL_MEM_GB_CONSOLIDATION_MODE
553+
if job_utils.is_consolidation_mode(
554+
on_api_restart=True) else
555+
server_constants.MIN_AVAIL_MEM_GB)
556+
if avail_mem_size_gb <= max_memory:
557+
logger.warning(
558+
f'{colorama.Fore.YELLOW}Your SkyPilot API server machine '
559+
f'only has {avail_mem_size_gb:.1f}GB memory available. '
560+
f'At least {max_memory}GB is recommended to support higher '
561+
'load with better performance.'
562+
f'{colorama.Style.RESET_ALL}')
555563

556564
args = [sys.executable, *API_SERVER_CMD.split()]
557565
if deploy:
@@ -560,8 +568,6 @@ def _start_api_server(deploy: bool = False,
560568
args += [f'--host={host}']
561569
if metrics_port is not None:
562570
args += [f'--metrics-port={metrics_port}']
563-
# Use this argument to disable the internal signal file check.
564-
args += ['--start-with-python']
565571

566572
if foreground:
567573
# Replaces the current process with the API server

sky/server/server.py

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2108,7 +2108,6 @@ def apply_user_hash(user_hash: str) -> None:
21082108
# Serve metrics on a separate port to isolate it from the application APIs:
21092109
# metrics port will not be exposed to the public network typically.
21102110
parser.add_argument('--metrics-port', default=9090, type=int)
2111-
parser.add_argument('--start-with-python', action='store_true')
21122111
cmd_args = parser.parse_args()
21132112
if cmd_args.port == cmd_args.metrics_port:
21142113
logger.error('port and metrics-port cannot be the same, exiting.')
@@ -2123,9 +2122,18 @@ def apply_user_hash(user_hash: str) -> None:
21232122
logger.error(f'Port {cmd_args.port} is not available, exiting.')
21242123
raise RuntimeError(f'Port {cmd_args.port} is not available')
21252124

2126-
if not cmd_args.start_with_python:
2127-
# Maybe touch the signal file on API server startup.
2128-
managed_job_utils.is_consolidation_mode(on_api_restart=True)
2125+
# Maybe touch the signal file on API server startup. Do it again here even
2126+
# if we already touched it in the sky/server/common.py::_start_api_server.
2127+
# This is because the sky/server/common.py::_start_api_server function call
2128+
# is running outside the skypilot API server process tree. The process tree
2129+
# starts within that function (see the `subprocess.Popen` call in
2130+
# sky/server/common.py::_start_api_server). When pg is used, the
2131+
# _start_api_server function will not load the config file from db, which
2132+
# will ignore the consolidation mode config. Here, inside the process tree,
2133+
# we already reload the config as a server (with env var _start_api_server),
2134+
# so we will respect the consolidation mode config.
2135+
# Refers to #7717 for more details.
2136+
managed_job_utils.is_consolidation_mode(on_api_restart=True)
21292137

21302138
# Show the privacy policy if it is not already shown. We place it here so
21312139
# that it is shown only when the API server is started.

0 commit comments

Comments
 (0)