Skip to content

Commit 348b19c

Browse files
authored
fix(sqllab): validate results backend writes and enhance 410 diagnostics (#36222)
1 parent 979d385 commit 348b19c

File tree

3 files changed

+208
-19
lines changed

3 files changed

+208
-19
lines changed

superset/commands/sql_lab/results.py

Lines changed: 41 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -59,40 +59,65 @@ def validate(self) -> None:
5959
)
6060
)
6161

62-
read_from_results_backend_start = now_as_float()
63-
self._blob = results_backend.get(self._key)
64-
app.config["STATS_LOGGER"].timing(
65-
"sqllab.query.results_backend_read",
66-
now_as_float() - read_from_results_backend_start,
67-
)
62+
stats_logger = app.config["STATS_LOGGER"]
6863

69-
if not self._blob:
64+
# Check if query exists in database first (fast, avoids unnecessary S3 call)
65+
self._query = (
66+
db.session.query(Query).filter_by(results_key=self._key).one_or_none()
67+
)
68+
if self._query is None:
69+
logger.warning(
70+
"404 Error - Query not found in database for key: %s",
71+
self._key,
72+
)
73+
stats_logger.incr("sqllab.results_backend.404_query_not_found")
7074
raise SupersetErrorException(
7175
SupersetError(
7276
message=__(
73-
"Data could not be retrieved from the results backend. You "
74-
"need to re-run the original query."
77+
"The query associated with these results could not be found. "
78+
"You need to re-run the original query."
7579
),
7680
error_type=SupersetErrorType.RESULTS_BACKEND_ERROR,
7781
level=ErrorLevel.ERROR,
7882
),
79-
status=410,
83+
status=404,
8084
)
8185

82-
self._query = (
83-
db.session.query(Query).filter_by(results_key=self._key).one_or_none()
86+
# Now fetch results from backend (query exists, so this is a valid request)
87+
read_from_results_backend_start = now_as_float()
88+
self._blob = results_backend.get(self._key)
89+
stats_logger.timing(
90+
"sqllab.query.results_backend_read",
91+
now_as_float() - read_from_results_backend_start,
8492
)
85-
if self._query is None:
93+
94+
if not self._blob:
95+
# Query exists in DB but results not in S3 - enhanced diagnostics
96+
query_age_seconds = now_as_float() - (
97+
self._query.end_time if self._query.end_time else now_as_float()
98+
)
99+
logger.warning(
100+
"410 Error - Query exists in DB but results not in results backend"
101+
" Query ID: %s, Status: %s, Age: %.2f seconds, "
102+
"End time: %s, Results key: %s",
103+
self._query.id,
104+
self._query.status,
105+
query_age_seconds,
106+
self._query.end_time,
107+
self._key,
108+
)
109+
stats_logger.incr("sqllab.results_backend.410_results_missing")
110+
86111
raise SupersetErrorException(
87112
SupersetError(
88113
message=__(
89-
"The query associated with these results could not be found. "
90-
"You need to re-run the original query."
114+
"Data could not be retrieved from the results backend. You "
115+
"need to re-run the original query."
91116
),
92117
error_type=SupersetErrorType.RESULTS_BACKEND_ERROR,
93118
level=ErrorLevel.ERROR,
94119
),
95-
status=404,
120+
status=410,
96121
)
97122

98123
def run(

superset/sql_lab.py

Lines changed: 43 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -582,10 +582,50 @@ def execute_sql_statements( # noqa: C901
582582
"*** serialized payload size: %i", getsizeof(serialized_payload)
583583
)
584584
logger.debug("*** compressed payload size: %i", getsizeof(compressed))
585-
results_backend.set(key, compressed, cache_timeout)
586-
query.results_key = key
587585

588-
query.status = QueryStatus.SUCCESS
586+
# Store results in backend and check if write succeeded
587+
write_success = results_backend.set(key, compressed, cache_timeout)
588+
if not write_success:
589+
# Backend write failed - log error and don't set results_key
590+
logger.error(
591+
"Query %s: Failed to store results in backend, key: %s",
592+
str(query_id),
593+
key,
594+
)
595+
stats_logger.incr("sqllab.results_backend.write_failure")
596+
# Don't set results_key to prevent 410 errors when fetching
597+
query.results_key = None
598+
599+
# For async queries (not returning results inline), mark as FAILED
600+
# because results are inaccessible to the user
601+
if not return_results:
602+
query.status = QueryStatus.FAILED
603+
query.error_message = (
604+
"Failed to store query results in the results backend. "
605+
"Please try again or contact your administrator."
606+
)
607+
db.session.commit()
608+
raise SupersetErrorException(
609+
SupersetError(
610+
message=__(
611+
"Failed to store query results. Please try again."
612+
),
613+
error_type=SupersetErrorType.RESULTS_BACKEND_ERROR,
614+
level=ErrorLevel.ERROR,
615+
)
616+
)
617+
else:
618+
# Write succeeded - set results_key in database
619+
query.results_key = key
620+
logger.info(
621+
"Query %s: Successfully stored results in backend, key: %s",
622+
str(query_id),
623+
key,
624+
)
625+
626+
# Only set SUCCESS if we didn't already set FAILED above
627+
if query.status != QueryStatus.FAILED:
628+
query.status = QueryStatus.SUCCESS
589629
db.session.commit()
590630

591631
if return_results:

tests/integration_tests/sql_lab/test_execute_sql_statements.py

Lines changed: 124 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,8 +14,11 @@
1414
# KIND, either express or implied. See the License for the
1515
# specific language governing permissions and limitations
1616
# under the License.
17+
from unittest.mock import MagicMock, patch
18+
1719
from flask import current_app
1820

21+
from superset import db
1922
from superset.common.db_query_status import QueryStatus
2023
from superset.models.core import Database
2124
from superset.models.sql_lab import Query
@@ -54,3 +57,124 @@ def test_non_async_execute(non_async_example_db: Database, example_query: Query)
5457

5558
if non_async_example_db.db_engine_spec.engine_name == "hive":
5659
assert example_query.tracking_url_raw
60+
61+
62+
@patch("superset.sql_lab.results_backend")
63+
def test_results_backend_write_failure(
64+
mock_results_backend: MagicMock,
65+
async_example_db: Database,
66+
example_query: Query,
67+
):
68+
"""Test async query marked FAILED when results_backend.set() False"""
69+
import pytest
70+
71+
from superset.exceptions import SupersetErrorException
72+
73+
# Mock results backend to simulate write failure
74+
mock_results_backend.set.return_value = False
75+
76+
# Execute query with store_results=True, return_results=False (async mode)
77+
# Should raise exception because results can't be stored
78+
with pytest.raises(SupersetErrorException) as exc_info:
79+
execute_sql_statements(
80+
example_query.id,
81+
"select 1 as foo;",
82+
store_results=True,
83+
return_results=False,
84+
start_time=now_as_float(),
85+
expand_data=False,
86+
log_params=dict(), # noqa: C408
87+
)
88+
89+
# Verify exception message
90+
assert "Failed to store query results" in str(exc_info.value.error.message)
91+
92+
# Refresh query from database to get updated state
93+
db.session.refresh(example_query)
94+
95+
# Assert query status is FAILED (results inaccessible for async queries)
96+
assert example_query.status == QueryStatus.FAILED
97+
98+
# Assert results_key is None (because backend write failed)
99+
assert example_query.results_key is None
100+
101+
# Assert error message is set
102+
assert "Failed to store query results" in example_query.error_message
103+
104+
# Assert backend.set() was called
105+
assert mock_results_backend.set.called
106+
107+
108+
@patch("superset.sql_lab.results_backend")
109+
def test_results_backend_write_success(
110+
mock_results_backend: MagicMock,
111+
async_example_db: Database,
112+
example_query: Query,
113+
):
114+
"""Test that query.results_key is set when results_backend.set() True"""
115+
# Mock results backend to simulate successful write
116+
mock_results_backend.set.return_value = True
117+
118+
# Execute query with store_results=True (async mode)
119+
execute_sql_statements(
120+
example_query.id,
121+
"select 1 as foo;",
122+
store_results=True,
123+
return_results=False,
124+
start_time=now_as_float(),
125+
expand_data=False,
126+
log_params=dict(), # noqa: C408
127+
)
128+
129+
# Refresh query from database to get updated state
130+
db.session.refresh(example_query)
131+
132+
# Assert query status is SUCCESS
133+
assert example_query.status == QueryStatus.SUCCESS
134+
135+
# Assert results_key is set (UUID format)
136+
assert example_query.results_key is not None
137+
assert len(example_query.results_key) == 36 # UUID length with dashes
138+
139+
# Assert backend.set() was called
140+
assert mock_results_backend.set.called
141+
142+
143+
@patch("superset.sql_lab.results_backend")
144+
def test_results_backend_write_failure_sync_mode(
145+
mock_results_backend: MagicMock,
146+
non_async_example_db: Database,
147+
example_query: Query,
148+
):
149+
"""Test sync query SUCCESS when cache write fails (results inline)"""
150+
# Mock results backend to simulate write failure
151+
mock_results_backend.set.return_value = False
152+
153+
# Execute query with return_results=True (sync mode - results returned inline)
154+
result = execute_sql_statements(
155+
example_query.id,
156+
"select 1 as foo;",
157+
store_results=True,
158+
return_results=True,
159+
start_time=now_as_float(),
160+
expand_data=True,
161+
log_params=dict(), # noqa: C408
162+
)
163+
164+
# Should return results inline even when cache write fails
165+
assert result
166+
assert result["query_id"] == example_query.id
167+
assert result["status"] == QueryStatus.SUCCESS
168+
assert result["data"] == [{"foo": 1}]
169+
170+
# Refresh query from database to get updated state
171+
db.session.refresh(example_query)
172+
173+
# Assert query status is SUCCESS (results were returned inline)
174+
assert example_query.status == QueryStatus.SUCCESS
175+
176+
# Assert results_key is None (because backend write failed)
177+
assert example_query.results_key is None
178+
179+
# Assert backend.set() was called
180+
assert mock_results_backend.set.called

0 commit comments

Comments
 (0)