fix(SQL Lab): hang when result set size is too big (#30522)
Co-authored-by: aadhikari <aadhikari@apple.com> Co-authored-by: Ville Brofeldt <33317356+villebro@users.noreply.github.com>
This commit is contained in:
parent
0e9c0f621a
commit
6ede3271ff
|
|
@ -116,7 +116,8 @@
|
|||
"GENERIC_BACKEND_ERROR",
|
||||
"INVALID_PAYLOAD_FORMAT_ERROR",
|
||||
"INVALID_PAYLOAD_SCHEMA_ERROR",
|
||||
"REPORT_NOTIFICATION_ERROR"
|
||||
"REPORT_NOTIFICATION_ERROR",
|
||||
"RESULT_TOO_LARGE_ERROR"
|
||||
],
|
||||
"type": "string"
|
||||
},
|
||||
|
|
|
|||
|
|
@ -227,6 +227,7 @@ export const ErrorTypeEnum = {
|
|||
ASYNC_WORKERS_ERROR: 'ASYNC_WORKERS_ERROR',
|
||||
ADHOC_SUBQUERY_NOT_ALLOWED_ERROR: 'ADHOC_SUBQUERY_NOT_ALLOWED_ERROR',
|
||||
INVALID_SQL_ERROR: 'INVALID_SQL_ERROR',
|
||||
RESULT_TOO_LARGE_ERROR: 'RESULT_TOO_LARGE_ERROR',
|
||||
|
||||
// Generic errors
|
||||
GENERIC_COMMAND_ERROR: 'GENERIC_COMMAND_ERROR',
|
||||
|
|
|
|||
|
|
@ -159,5 +159,9 @@ export default function setupErrorMessages() {
|
|||
ErrorTypeEnum.INVALID_SQL_ERROR,
|
||||
InvalidSQLErrorMessage,
|
||||
);
|
||||
errorMessageComponentRegistry.registerValue(
|
||||
ErrorTypeEnum.RESULT_TOO_LARGE_ERROR,
|
||||
DatabaseErrorMessage,
|
||||
);
|
||||
setupErrorMessagesExtra();
|
||||
}
|
||||
|
|
|
|||
|
|
@ -960,6 +960,9 @@ SUPERSET_META_DB_LIMIT: int | None = 1000
|
|||
SQLLAB_SAVE_WARNING_MESSAGE = None
|
||||
SQLLAB_SCHEDULE_WARNING_MESSAGE = None
|
||||
|
||||
# Max payload size (MB) for SQL Lab to prevent browser hangs with large results.
|
||||
SQLLAB_PAYLOAD_MAX_MB = None
|
||||
|
||||
# Force refresh while auto-refresh in dashboard
|
||||
DASHBOARD_AUTO_REFRESH_MODE: Literal["fetch", "force"] = "force"
|
||||
# Dashboard auto refresh intervals
|
||||
|
|
|
|||
|
|
@ -87,6 +87,7 @@ class SupersetErrorType(StrEnum):
|
|||
ASYNC_WORKERS_ERROR = "ASYNC_WORKERS_ERROR"
|
||||
ADHOC_SUBQUERY_NOT_ALLOWED_ERROR = "ADHOC_SUBQUERY_NOT_ALLOWED_ERROR"
|
||||
INVALID_SQL_ERROR = "INVALID_SQL_ERROR"
|
||||
RESULT_TOO_LARGE_ERROR = "RESULT_TOO_LARGE_ERROR"
|
||||
|
||||
# Generic errors
|
||||
GENERIC_COMMAND_ERROR = "GENERIC_COMMAND_ERROR"
|
||||
|
|
@ -151,6 +152,7 @@ ISSUE_CODES = {
|
|||
1036: _("The database was deleted."),
|
||||
1037: _("Custom SQL fields cannot contain sub-queries."),
|
||||
1040: _("The submitted payload failed validation."),
|
||||
1041: _("The result size exceeds the allowed limit."),
|
||||
}
|
||||
|
||||
|
||||
|
|
@ -190,6 +192,7 @@ ERROR_TYPES_TO_ISSUE_CODES_MAPPING = {
|
|||
SupersetErrorType.DATABASE_NOT_FOUND_ERROR: [1011, 1036],
|
||||
SupersetErrorType.CONNECTION_DATABASE_TIMEOUT: [1001, 1009],
|
||||
SupersetErrorType.MARSHMALLOW_ERROR: [1040],
|
||||
SupersetErrorType.RESULT_TOO_LARGE_ERROR: [1041],
|
||||
}
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -17,6 +17,7 @@
|
|||
# pylint: disable=consider-using-transaction
|
||||
import dataclasses
|
||||
import logging
|
||||
import sys
|
||||
import uuid
|
||||
from contextlib import closing
|
||||
from datetime import datetime
|
||||
|
|
@ -78,6 +79,7 @@ SQL_MAX_ROW = config["SQL_MAX_ROW"]
|
|||
SQLLAB_CTAS_NO_LIMIT = config["SQLLAB_CTAS_NO_LIMIT"]
|
||||
log_query = config["QUERY_LOGGER"]
|
||||
logger = logging.getLogger(__name__)
|
||||
BYTES_IN_MB = 1024 * 1024
|
||||
|
||||
|
||||
class SqlLabException(Exception):
|
||||
|
|
@ -531,6 +533,7 @@ def execute_sql_statements(
|
|||
log_params,
|
||||
apply_ctas,
|
||||
)
|
||||
|
||||
except SqlLabQueryStoppedException:
|
||||
payload.update({"status": QueryStatus.STOPPED})
|
||||
return payload
|
||||
|
|
@ -601,6 +604,22 @@ def execute_sql_statements(
|
|||
serialized_payload = _serialize_payload(
|
||||
payload, cast(bool, results_backend_use_msgpack)
|
||||
)
|
||||
|
||||
# Check the size of the serialized payload
|
||||
if sql_lab_payload_max_mb := config.get("SQLLAB_PAYLOAD_MAX_MB"):
|
||||
serialized_payload_size = sys.getsizeof(serialized_payload)
|
||||
max_bytes = sql_lab_payload_max_mb * BYTES_IN_MB
|
||||
|
||||
if serialized_payload_size > max_bytes:
|
||||
logger.info("Result size exceeds the allowed limit.")
|
||||
raise SupersetErrorException(
|
||||
SupersetError(
|
||||
message=f"Result size ({serialized_payload_size / BYTES_IN_MB:.2f} MB) exceeds the allowed limit of {sql_lab_payload_max_mb} MB.",
|
||||
error_type=SupersetErrorType.RESULT_TOO_LARGE_ERROR,
|
||||
level=ErrorLevel.ERROR,
|
||||
)
|
||||
)
|
||||
|
||||
cache_timeout = database.cache_timeout
|
||||
if cache_timeout is None:
|
||||
cache_timeout = config["CACHE_DEFAULT_TIMEOUT"]
|
||||
|
|
@ -635,6 +654,23 @@ def execute_sql_statements(
|
|||
"expanded_columns": expanded_columns,
|
||||
}
|
||||
)
|
||||
# Check the size of the serialized payload (opt-in logic for return_results)
|
||||
if sql_lab_payload_max_mb := config.get("SQLLAB_PAYLOAD_MAX_MB"):
|
||||
serialized_payload = _serialize_payload(
|
||||
payload, cast(bool, results_backend_use_msgpack)
|
||||
)
|
||||
serialized_payload_size = sys.getsizeof(serialized_payload)
|
||||
max_bytes = sql_lab_payload_max_mb * BYTES_IN_MB
|
||||
|
||||
if serialized_payload_size > max_bytes:
|
||||
logger.info("Result size exceeds the allowed limit.")
|
||||
raise SupersetErrorException(
|
||||
SupersetError(
|
||||
message=f"Result size ({serialized_payload_size / BYTES_IN_MB:.2f} MB) exceeds the allowed limit of {sql_lab_payload_max_mb} MB.",
|
||||
error_type=SupersetErrorType.RESULT_TOO_LARGE_ERROR,
|
||||
level=ErrorLevel.ERROR,
|
||||
)
|
||||
)
|
||||
return payload
|
||||
|
||||
return None
|
||||
|
|
|
|||
|
|
@ -17,8 +17,10 @@
|
|||
# pylint: disable=import-outside-toplevel, invalid-name, unused-argument, too-many-locals
|
||||
|
||||
import json
|
||||
from unittest import mock
|
||||
from uuid import UUID
|
||||
|
||||
import pytest
|
||||
import sqlparse
|
||||
from freezegun import freeze_time
|
||||
from pytest_mock import MockerFixture
|
||||
|
|
@ -27,9 +29,9 @@ from sqlalchemy.orm.session import Session
|
|||
from superset import db
|
||||
from superset.common.db_query_status import QueryStatus
|
||||
from superset.errors import ErrorLevel, SupersetErrorType
|
||||
from superset.exceptions import OAuth2Error
|
||||
from superset.exceptions import OAuth2Error, SupersetErrorException
|
||||
from superset.models.core import Database
|
||||
from superset.sql_lab import get_sql_results
|
||||
from superset.sql_lab import execute_sql_statements, get_sql_results
|
||||
from superset.utils.core import override_user
|
||||
from tests.unit_tests.models.core_test import oauth2_client_info
|
||||
|
||||
|
|
@ -125,6 +127,115 @@ def test_execute_sql_statement_with_rls(
|
|||
SupersetResultSet.assert_called_with([(42,)], cursor.description, db_engine_spec)
|
||||
|
||||
|
||||
@mock.patch.dict(
|
||||
"superset.sql_lab.config",
|
||||
{"SQLLAB_PAYLOAD_MAX_MB": 50}, # Set the desired config value for testing
|
||||
)
|
||||
def test_execute_sql_statement_exceeds_payload_limit(mocker: MockerFixture) -> None:
|
||||
"""
|
||||
Test for `execute_sql_statements` when the result payload size exceeds the limit.
|
||||
"""
|
||||
|
||||
# Mock the query object and database
|
||||
query = mocker.MagicMock()
|
||||
query.limit = 1
|
||||
query.database = mocker.MagicMock()
|
||||
query.database.db_engine_spec.is_select_query.return_value = True
|
||||
query.database.cache_timeout = 100
|
||||
query.status = "RUNNING"
|
||||
query.select_as_cta = False
|
||||
query.database.allow_run_async = True
|
||||
|
||||
# Mock get_query to return our mocked query object
|
||||
mocker.patch("superset.sql_lab.get_query", return_value=query)
|
||||
|
||||
# Mock sys.getsizeof to simulate a large payload size
|
||||
mocker.patch("sys.getsizeof", return_value=100000000) # 100 MB
|
||||
|
||||
# Mock _serialize_payload
|
||||
def mock_serialize_payload(payload, use_msgpack):
|
||||
return "serialized_payload"
|
||||
|
||||
mocker.patch(
|
||||
"superset.sql_lab._serialize_payload", side_effect=mock_serialize_payload
|
||||
)
|
||||
|
||||
# Mock db.session.refresh to avoid AttributeError during session refresh
|
||||
mocker.patch("superset.sql_lab.db.session.refresh", return_value=None)
|
||||
|
||||
# Mock the results backend to avoid "Results backend is not configured" error
|
||||
mocker.patch("superset.sql_lab.results_backend", return_value=True)
|
||||
|
||||
# Test that the exception is raised when the payload exceeds the limit
|
||||
with pytest.raises(SupersetErrorException):
|
||||
execute_sql_statements(
|
||||
query_id=1,
|
||||
rendered_query="SELECT 42 AS answer",
|
||||
return_results=True, # Simulate that results are being returned
|
||||
store_results=True, # Not storing results but returning them
|
||||
start_time=None,
|
||||
expand_data=False,
|
||||
log_params={},
|
||||
)
|
||||
|
||||
|
||||
@mock.patch.dict(
|
||||
"superset.sql_lab.config",
|
||||
{"SQLLAB_PAYLOAD_MAX_MB": 50}, # Set the desired config value for testing
|
||||
)
|
||||
def test_execute_sql_statement_within_payload_limit(mocker: MockerFixture) -> None:
|
||||
"""
|
||||
Test for `execute_sql_statements` when the result payload size is within the limit,
|
||||
and check if the flow executes smoothly without raising any exceptions.
|
||||
"""
|
||||
|
||||
# Mock the query object and database
|
||||
query = mocker.MagicMock()
|
||||
query.limit = 1
|
||||
query.database = mocker.MagicMock()
|
||||
query.database.db_engine_spec.is_select_query.return_value = True
|
||||
query.database.cache_timeout = 100
|
||||
query.status = "RUNNING"
|
||||
query.select_as_cta = False
|
||||
query.database.allow_run_async = True
|
||||
|
||||
# Mock get_query to return our mocked query object
|
||||
mocker.patch("superset.sql_lab.get_query", return_value=query)
|
||||
|
||||
# Mock sys.getsizeof to simulate a payload size that is within the limit
|
||||
mocker.patch("sys.getsizeof", return_value=10000000) # 10 MB (within limit)
|
||||
|
||||
# Mock _serialize_payload
|
||||
def mock_serialize_payload(payload, use_msgpack):
|
||||
return "serialized_payload"
|
||||
|
||||
mocker.patch(
|
||||
"superset.sql_lab._serialize_payload", side_effect=mock_serialize_payload
|
||||
)
|
||||
|
||||
# Mock db.session.refresh to avoid AttributeError during session refresh
|
||||
mocker.patch("superset.sql_lab.db.session.refresh", return_value=None)
|
||||
|
||||
# Mock the results backend to avoid "Results backend is not configured" error
|
||||
mocker.patch("superset.sql_lab.results_backend", return_value=True)
|
||||
|
||||
# Test that no exception is raised and the function executes smoothly
|
||||
try:
|
||||
execute_sql_statements(
|
||||
query_id=1,
|
||||
rendered_query="SELECT 42 AS answer",
|
||||
return_results=True, # Simulate that results are being returned
|
||||
store_results=True, # Not storing results but returning them
|
||||
start_time=None,
|
||||
expand_data=False,
|
||||
log_params={},
|
||||
)
|
||||
except SupersetErrorException:
|
||||
pytest.fail(
|
||||
"SupersetErrorException should not have been raised for payload within the limit"
|
||||
)
|
||||
|
||||
|
||||
def test_sql_lab_insert_rls_as_subquery(
|
||||
mocker: MockerFixture,
|
||||
session: Session,
|
||||
|
|
|
|||
Loading…
Reference in New Issue