Skip to content

Commit f6c9429

Browse files
authored
[k8s] add even more debug logs around k8s list_namespaced_pod (#7852)
* add more debug logs around k8s list_namespaced_pod * more info * reduce overhead * label selector * clean * only debug
1 parent 369f180 commit f6c9429

File tree

1 file changed

+33
-13
lines changed

1 file changed

+33
-13
lines changed

sky/provision/kubernetes/instance.py

Lines changed: 33 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1691,29 +1691,49 @@ def query_instances(
16911691
context = kubernetes_utils.get_context_from_config(provider_config)
16921692
is_ssh = context.startswith('ssh-') if context else False
16931693
identity = 'SSH Node Pool' if is_ssh else 'Kubernetes cluster'
1694+
label_selector = (f'{constants.TAG_SKYPILOT_CLUSTER_NAME}='
1695+
f'{cluster_name_on_cloud}')
16941696

16951697
# Get all the pods with the label skypilot-cluster-name: <cluster_name>
16961698
try:
1697-
logger.debug(
1698-
f'Querying k8s api for pods in context: {context} and '
1699-
f'namespace: {namespace} with '
1700-
f'`skypilot-cluster-name={cluster_name_on_cloud}` label selector.')
1699+
# log the query parameters we pass to the k8s api
1700+
logger.debug(f'Querying k8s api for pods:\n'
1701+
f'context: {context}\n'
1702+
f'namespace: {namespace}\n'
1703+
f'label selector:`{label_selector}`.')
17011704

1702-
label_selector = (f'{constants.TAG_SKYPILOT_CLUSTER_NAME}='
1703-
f'{cluster_name_on_cloud}')
17041705
response = kubernetes.core_api(context).list_namespaced_pod(
17051706
namespace,
17061707
label_selector=label_selector,
17071708
_request_timeout=kubernetes.API_TIMEOUT)
1709+
1710+
# log PodList response info
1711+
if sky_logging.logging_enabled(logger, sky_logging.DEBUG):
1712+
logger.debug(f'k8s api response for `{label_selector}`:\n'
1713+
f'apiVersion={response.api_version}, '
1714+
f'kind={response.kind},\n'
1715+
f'metadata={response.metadata}')
1716+
17081717
pods = response.items
17091718

1710-
# Log response metadata
1711-
# pylint: disable=protected-access
1712-
logger.debug(
1713-
f'Query response for skypilot cluster {cluster_name_on_cloud}: '
1714-
f'resource_version={response.metadata.resource_version}, '
1715-
f'pod_count={len(pods)}, '
1716-
f'continue_token={response.metadata._continue}')
1719+
# log detailed Pod info
1720+
if sky_logging.logging_enabled(logger, sky_logging.DEBUG):
1721+
logger.debug(f'k8s api response for `{label_selector}`: '
1722+
f'len(pods)={len(pods)}')
1723+
for pod in pods:
1724+
logger.debug(f'k8s pod info for `{label_selector}`: '
1725+
f'pod.apiVersion={pod.api_version}, '
1726+
f'pod.kind={pod.kind}, \n'
1727+
f'pod.name={pod.metadata.name}, '
1728+
f'pod.namespace={pod.metadata.namespace}, \n'
1729+
f'pod.labels={pod.metadata.labels}, \n'
1730+
f'pod.annotations={pod.metadata.annotations}, \n'
1731+
'pod.creationTimestamp='
1732+
f'{pod.metadata.creation_timestamp}, '
1733+
'pod.deletionTimestamp='
1734+
f'{pod.metadata.deletion_timestamp}, \n'
1735+
f'pod.status={pod.status}')
1736+
17171737
except kubernetes.max_retry_error():
17181738
with ux_utils.print_exception_no_traceback():
17191739
if is_ssh:

0 commit comments

Comments
 (0)