[core] reduce database query frequency for /api/stream (#7569)

cg505 · aylei · web-flow · commit 72125e8f2e37 · 2025-10-11T01:23:59.000+08:00
* [core] reduce database query frequency for /api/stream

* Refine

Signed-off-by: Aylei &lt;rayingecho@gmail.com&gt;

* Fix bug

Signed-off-by: Aylei &lt;rayingecho@gmail.com&gt;

* Fix get_request_async called in coroutine polling

Signed-off-by: Aylei &lt;rayingecho@gmail.com&gt;

* Fix UT

Signed-off-by: Aylei &lt;rayingecho@gmail.com&gt;

---------

Signed-off-by: Aylei &lt;rayingecho@gmail.com&gt;
Co-authored-by: Aylei &lt;rayingecho@gmail.com&gt;
diff --git a/sky/jobs/server/server.py b/sky/jobs/server/server.py
@@ -116,7 +116,7 @@ async def logs(
         # Cancel the coroutine after the request is done or client disconnects
         background_tasks.add_task(task.cancel)
 
-    return stream_utils.stream_response(
+    return stream_utils.stream_response_for_long_request(
         request_id=request_task.request_id,
         logs_path=request_task.log_path,
         background_tasks=background_tasks,
@@ -201,7 +201,7 @@ async def pool_tail_logs(
 
     request_task = api_requests.get_request(request.state.request_id)
 
-    return stream_utils.stream_response(
+    return stream_utils.stream_response_for_long_request(
         request_id=request_task.request_id,
         logs_path=request_task.log_path,
         background_tasks=background_tasks,
diff --git a/sky/serve/server/server.py b/sky/serve/server/server.py
@@ -109,7 +109,7 @@ async def tail_logs(
     task = executor.execute_request_in_coroutine(request_task)
     # Cancel the coroutine after the request is done or client disconnects
     background_tasks.add_task(task.cancel)
-    return stream_utils.stream_response(
+    return stream_utils.stream_response_for_long_request(
         request_id=request_task.request_id,
         logs_path=request_task.log_path,
         background_tasks=background_tasks,
diff --git a/sky/server/requests/executor.py b/sky/server/requests/executor.py
@@ -580,11 +580,11 @@ async def _execute_request_coroutine(request: api_requests.Request):
                                                   **request_body.to_kwargs())
 
     async def poll_task(request_id: str) -> bool:
-        request = await api_requests.get_request_async(request_id)
-        if request is None:
+        req_status = await api_requests.get_request_status_async(request_id)
+        if req_status is None:
             raise RuntimeError('Request not found')
 
-        if request.status == api_requests.RequestStatus.CANCELLED:
+        if req_status.status == api_requests.RequestStatus.CANCELLED:
             ctx.cancel()
             return True
 
diff --git a/sky/server/server.py b/sky/server/server.py
@@ -1243,7 +1243,7 @@ async def logs(
     background_tasks.add_task(task.cancel)
     # TODO(zhwu): This makes viewing logs in browser impossible. We should adopt
     # the same approach as /stream.
-    return stream_utils.stream_response(
+    return stream_utils.stream_response_for_long_request(
         request_id=request.state.request_id,
         logs_path=request_task.log_path,
         background_tasks=background_tasks,
@@ -1539,6 +1539,7 @@ async def stream(
                 'X-Accel-Buffering': 'no'
             })
 
+    polling_interval = stream_utils.DEFAULT_POLL_INTERVAL
     # Original plain text streaming logic
     if request_id is not None:
         request_task = await requests_lib.get_request_async(request_id)
@@ -1553,6 +1554,8 @@ async def stream(
             raise fastapi.HTTPException(
                 status_code=404,
                 detail=f'Log of request {request_id!r} has been deleted')
+        if request_task.schedule_type == requests_lib.ScheduleType.LONG:
+            polling_interval = stream_utils.LONG_REQUEST_POLL_INTERVAL
     else:
         assert log_path is not None, (request_id, log_path)
         if log_path == constants.API_SERVER_LOGS:
@@ -1600,7 +1603,8 @@ async def stream(
                                           log_path_to_stream,
                                           plain_logs=format == 'plain',
                                           tail=tail,
-                                          follow=follow),
+                                          follow=follow,
+                                          polling_interval=polling_interval),
         media_type='text/plain',
         headers=headers,
     )
diff --git a/sky/server/stream_utils.py b/sky/server/stream_utils.py
@@ -11,6 +11,7 @@
 from sky import global_user_state
 from sky import sky_logging
 from sky.server.requests import requests as requests_lib
+from sky.utils import common_utils
 from sky.utils import message_utils
 from sky.utils import rich_utils
 from sky.utils import status_lib
@@ -24,7 +25,9 @@
 _BUFFER_SIZE = 8 * 1024  # 8KB
 _BUFFER_TIMEOUT = 0.02  # 20ms
 _HEARTBEAT_INTERVAL = 30
-_CLUSTER_STATUS_INTERVAL = 1
+
+LONG_REQUEST_POLL_INTERVAL = 1
+DEFAULT_POLL_INTERVAL = 0.1
 
 
 async def _yield_log_file_with_payloads_skipped(
@@ -41,12 +44,14 @@ async def _yield_log_file_with_payloads_skipped(
 
 
 async def log_streamer(
-        request_id: Optional[str],
-        log_path: pathlib.Path,
-        plain_logs: bool = False,
-        tail: Optional[int] = None,
-        follow: bool = True,
-        cluster_name: Optional[str] = None) -> AsyncGenerator[str, None]:
+    request_id: Optional[str],
+    log_path: pathlib.Path,
+    plain_logs: bool = False,
+    tail: Optional[int] = None,
+    follow: bool = True,
+    cluster_name: Optional[str] = None,
+    polling_interval: float = DEFAULT_POLL_INTERVAL
+) -> AsyncGenerator[str, None]:
     """Streams the logs of a request.
 
     Args:
@@ -84,6 +89,11 @@ async def log_streamer(
                        f'scheduled: {request_id}')
         req_status = request_task.status
         req_msg = request_task.status_msg
+        # Slowly back off the database polling up to every 1 second, to avoid
+        # overloading the CPU and DB.
+        backoff = common_utils.Backoff(initial_backoff=polling_interval,
+                                       max_backoff_factor=10,
+                                       multiplier=1.2)
         while req_status < requests_lib.RequestStatus.RUNNING:
             if req_msg is not None:
                 waiting_msg = request_task.status_msg
@@ -99,7 +109,7 @@ async def log_streamer(
             # TODO(aylei): we should use a better mechanism to avoid busy
             # polling the DB, which can be a bottleneck for high-concurrency
             # requests.
-            await asyncio.sleep(0.1)
+            await asyncio.sleep(backoff.current_backoff())
             status_with_msg = await requests_lib.get_request_status_async(
                 request_id, include_msg=True)
             req_status = status_with_msg.status
@@ -111,17 +121,20 @@ async def log_streamer(
 
     async with aiofiles.open(log_path, 'rb') as f:
         async for chunk in _tail_log_file(f, request_id, plain_logs, tail,
-                                          follow, cluster_name):
+                                          follow, cluster_name,
+                                          polling_interval):
             yield chunk
 
 
 async def _tail_log_file(
-        f: aiofiles.threadpool.binary.AsyncBufferedReader,
-        request_id: Optional[str] = None,
-        plain_logs: bool = False,
-        tail: Optional[int] = None,
-        follow: bool = True,
-        cluster_name: Optional[str] = None) -> AsyncGenerator[str, None]:
+    f: aiofiles.threadpool.binary.AsyncBufferedReader,
+    request_id: Optional[str] = None,
+    plain_logs: bool = False,
+    tail: Optional[int] = None,
+    follow: bool = True,
+    cluster_name: Optional[str] = None,
+    polling_interval: float = DEFAULT_POLL_INTERVAL
+) -> AsyncGenerator[str, None]:
     """Tail the opened log file, buffer the lines and flush in chunks."""
 
     if tail is not None:
@@ -137,7 +150,7 @@ async def _tail_log_file(
             yield line_str
 
     last_heartbeat_time = asyncio.get_event_loop().time()
-    last_cluster_status_check_time = asyncio.get_event_loop().time()
+    last_status_check_time = asyncio.get_event_loop().time()
 
     # Buffer the lines in memory and flush them in chunks to improve log
     # tailing throughput.
@@ -167,7 +180,17 @@ async def flush_buffer() -> AsyncGenerator[str, None]:
 
         line: Optional[bytes] = await f.readline()
         if not line:
-            if request_id is not None:
+            # Avoid checking the status too frequently to avoid overloading the
+            # DB.
+            should_check_status = (current_time -
+                                   last_status_check_time) >= polling_interval
+            if not follow:
+                # We will only hit this path once, but we should make sure to
+                # check the status so that we display the final request status
+                # if the request is complete.
+                should_check_status = True
+            if request_id is not None and should_check_status:
+                last_status_check_time = current_time
                 req_status = await requests_lib.get_request_status_async(
                     request_id)
                 if req_status.status > requests_lib.RequestStatus.RUNNING:
@@ -185,20 +208,19 @@ async def flush_buffer() -> AsyncGenerator[str, None]:
                                 ' cancelled\n')
                     break
             if not follow:
+                # The below checks (cluster status, heartbeat) are not needed
+                # for non-follow logs.
                 break
             # Provision logs pass in cluster_name, check cluster status
-            # periodically to see if provisioning is done. We only
-            # check once a second to avoid overloading the DB.
-            check_status = (current_time - last_cluster_status_check_time
-                           ) >= _CLUSTER_STATUS_INTERVAL
-            if cluster_name is not None and check_status:
+            # periodically to see if provisioning is done.
+            if cluster_name is not None and should_check_status:
+                last_status_check_time = current_time
                 cluster_record = await (
                     global_user_state.get_status_from_cluster_name_async(
                         cluster_name))
                 if (cluster_record is None or
                         cluster_record != status_lib.ClusterStatus.INIT):
                     break
-                last_cluster_status_check_time = current_time
             if current_time - last_heartbeat_time >= _HEARTBEAT_INTERVAL:
                 # Currently just used to keep the connection busy, refer to
                 # https://github.com/skypilot-org/skypilot/issues/5750 for
@@ -234,9 +256,22 @@ async def flush_buffer() -> AsyncGenerator[str, None]:
         yield chunk
 
 
+def stream_response_for_long_request(
+    request_id: str,
+    logs_path: pathlib.Path,
+    background_tasks: fastapi.BackgroundTasks,
+) -> fastapi.responses.StreamingResponse:
+    return stream_response(request_id,
+                           logs_path,
+                           background_tasks,
+                           polling_interval=LONG_REQUEST_POLL_INTERVAL)
+
+
 def stream_response(
-    request_id: str, logs_path: pathlib.Path,
-    background_tasks: fastapi.BackgroundTasks
+    request_id: str,
+    logs_path: pathlib.Path,
+    background_tasks: fastapi.BackgroundTasks,
+    polling_interval: float = DEFAULT_POLL_INTERVAL
 ) -> fastapi.responses.StreamingResponse:
 
     async def on_disconnect():
@@ -249,7 +284,7 @@ async def on_disconnect():
     background_tasks.add_task(on_disconnect)
 
     return fastapi.responses.StreamingResponse(
-        log_streamer(request_id, logs_path),
+        log_streamer(request_id, logs_path, polling_interval=polling_interval),
         media_type='text/plain',
         headers={
             'Cache-Control': 'no-cache, no-transform',
diff --git a/sky/utils/common_utils.py b/sky/utils/common_utils.py
@@ -265,13 +265,16 @@ def get_global_job_id(job_timestamp: str,
 
 class Backoff:
     """Exponential backoff with jittering."""
-    MULTIPLIER = 1.6
     JITTER = 0.4
 
-    def __init__(self, initial_backoff: float = 5, max_backoff_factor: int = 5):
+    def __init__(self,
+                 initial_backoff: float = 5,
+                 max_backoff_factor: int = 5,
+                 multiplier: float = 1.6):
         self._initial = True
         self._backoff = 0.0
         self._initial_backoff = initial_backoff
+        self._multiplier = multiplier
         self._max_backoff = max_backoff_factor * self._initial_backoff
 
     # https://github.com/grpc/grpc/blob/2d4f3c56001cd1e1f85734b2f7c5ce5f2797c38a/doc/connection-backoff.md
@@ -283,7 +286,7 @@ def current_backoff(self) -> float:
             self._initial = False
             self._backoff = min(self._initial_backoff, self._max_backoff)
         else:
-            self._backoff = min(self._backoff * self.MULTIPLIER,
+            self._backoff = min(self._backoff * self._multiplier,
                                 self._max_backoff)
         self._backoff += random.uniform(-self.JITTER * self._backoff,
                                         self.JITTER * self._backoff)
diff --git a/tests/unit_tests/test_sky/server/test_server.py b/tests/unit_tests/test_sky/server/test_server.py
@@ -210,10 +210,10 @@ def slow_execute(*args, **kwargs):
         # Verify the executor calls
         mock_prepare.assert_called_once()
         mock_execute.assert_called_once_with(mock_request_task)
-        mock_stream.assert_called_once_with(
-            request_id=mock.ANY,
-            logs_path=mock_request_task.log_path,
-            background_tasks=mock.ANY)
+        mock_stream.assert_called_once_with(mock.ANY,
+                                            mock_request_task.log_path,
+                                            mock.ANY,
+                                            polling_interval=1)
 
 
 @mock.patch('sky.utils.context_utils.hijack_sys_attrs')