fix(SQL Lab): hang when result set size is too big (#30522)

Co-authored-by: aadhikari <aadhikari@apple.com>
Co-authored-by: Ville Brofeldt <33317356+villebro@users.noreply.github.com>
This commit is contained in:
anamitraadhikari 2024-10-14 18:03:28 -07:00 committed by GitHub
parent 0e9c0f621a
commit 6ede3271ff
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
7 changed files with 162 additions and 3 deletions

View File

@ -116,7 +116,8 @@
"GENERIC_BACKEND_ERROR",
"INVALID_PAYLOAD_FORMAT_ERROR",
"INVALID_PAYLOAD_SCHEMA_ERROR",
"REPORT_NOTIFICATION_ERROR"
"REPORT_NOTIFICATION_ERROR",
"RESULT_TOO_LARGE_ERROR"
],
"type": "string"
},

View File

@ -227,6 +227,7 @@ export const ErrorTypeEnum = {
ASYNC_WORKERS_ERROR: 'ASYNC_WORKERS_ERROR',
ADHOC_SUBQUERY_NOT_ALLOWED_ERROR: 'ADHOC_SUBQUERY_NOT_ALLOWED_ERROR',
INVALID_SQL_ERROR: 'INVALID_SQL_ERROR',
RESULT_TOO_LARGE_ERROR: 'RESULT_TOO_LARGE_ERROR',
// Generic errors
GENERIC_COMMAND_ERROR: 'GENERIC_COMMAND_ERROR',

View File

@ -159,5 +159,9 @@ export default function setupErrorMessages() {
ErrorTypeEnum.INVALID_SQL_ERROR,
InvalidSQLErrorMessage,
);
errorMessageComponentRegistry.registerValue(
ErrorTypeEnum.RESULT_TOO_LARGE_ERROR,
DatabaseErrorMessage,
);
setupErrorMessagesExtra();
}

View File

@ -960,6 +960,9 @@ SUPERSET_META_DB_LIMIT: int | None = 1000
SQLLAB_SAVE_WARNING_MESSAGE = None
SQLLAB_SCHEDULE_WARNING_MESSAGE = None
# Max payload size (MB) for SQL Lab to prevent browser hangs with large results.
SQLLAB_PAYLOAD_MAX_MB = None
# Force refresh while auto-refresh in dashboard
DASHBOARD_AUTO_REFRESH_MODE: Literal["fetch", "force"] = "force"
# Dashboard auto refresh intervals

View File

@ -87,6 +87,7 @@ class SupersetErrorType(StrEnum):
ASYNC_WORKERS_ERROR = "ASYNC_WORKERS_ERROR"
ADHOC_SUBQUERY_NOT_ALLOWED_ERROR = "ADHOC_SUBQUERY_NOT_ALLOWED_ERROR"
INVALID_SQL_ERROR = "INVALID_SQL_ERROR"
RESULT_TOO_LARGE_ERROR = "RESULT_TOO_LARGE_ERROR"
# Generic errors
GENERIC_COMMAND_ERROR = "GENERIC_COMMAND_ERROR"
@ -151,6 +152,7 @@ ISSUE_CODES = {
1036: _("The database was deleted."),
1037: _("Custom SQL fields cannot contain sub-queries."),
1040: _("The submitted payload failed validation."),
1041: _("The result size exceeds the allowed limit."),
}
@ -190,6 +192,7 @@ ERROR_TYPES_TO_ISSUE_CODES_MAPPING = {
SupersetErrorType.DATABASE_NOT_FOUND_ERROR: [1011, 1036],
SupersetErrorType.CONNECTION_DATABASE_TIMEOUT: [1001, 1009],
SupersetErrorType.MARSHMALLOW_ERROR: [1040],
SupersetErrorType.RESULT_TOO_LARGE_ERROR: [1041],
}

View File

@ -17,6 +17,7 @@
# pylint: disable=consider-using-transaction
import dataclasses
import logging
import sys
import uuid
from contextlib import closing
from datetime import datetime
@ -78,6 +79,7 @@ SQL_MAX_ROW = config["SQL_MAX_ROW"]
SQLLAB_CTAS_NO_LIMIT = config["SQLLAB_CTAS_NO_LIMIT"]
log_query = config["QUERY_LOGGER"]
logger = logging.getLogger(__name__)
BYTES_IN_MB = 1024 * 1024
class SqlLabException(Exception):
@ -531,6 +533,7 @@ def execute_sql_statements(
log_params,
apply_ctas,
)
except SqlLabQueryStoppedException:
payload.update({"status": QueryStatus.STOPPED})
return payload
@ -601,6 +604,22 @@ def execute_sql_statements(
serialized_payload = _serialize_payload(
payload, cast(bool, results_backend_use_msgpack)
)
# Check the size of the serialized payload
if sql_lab_payload_max_mb := config.get("SQLLAB_PAYLOAD_MAX_MB"):
serialized_payload_size = sys.getsizeof(serialized_payload)
max_bytes = sql_lab_payload_max_mb * BYTES_IN_MB
if serialized_payload_size > max_bytes:
logger.info("Result size exceeds the allowed limit.")
raise SupersetErrorException(
SupersetError(
message=f"Result size ({serialized_payload_size / BYTES_IN_MB:.2f} MB) exceeds the allowed limit of {sql_lab_payload_max_mb} MB.",
error_type=SupersetErrorType.RESULT_TOO_LARGE_ERROR,
level=ErrorLevel.ERROR,
)
)
cache_timeout = database.cache_timeout
if cache_timeout is None:
cache_timeout = config["CACHE_DEFAULT_TIMEOUT"]
@ -635,6 +654,23 @@ def execute_sql_statements(
"expanded_columns": expanded_columns,
}
)
# Check the size of the serialized payload (opt-in logic for return_results)
if sql_lab_payload_max_mb := config.get("SQLLAB_PAYLOAD_MAX_MB"):
serialized_payload = _serialize_payload(
payload, cast(bool, results_backend_use_msgpack)
)
serialized_payload_size = sys.getsizeof(serialized_payload)
max_bytes = sql_lab_payload_max_mb * BYTES_IN_MB
if serialized_payload_size > max_bytes:
logger.info("Result size exceeds the allowed limit.")
raise SupersetErrorException(
SupersetError(
message=f"Result size ({serialized_payload_size / BYTES_IN_MB:.2f} MB) exceeds the allowed limit of {sql_lab_payload_max_mb} MB.",
error_type=SupersetErrorType.RESULT_TOO_LARGE_ERROR,
level=ErrorLevel.ERROR,
)
)
return payload
return None

View File

@ -17,8 +17,10 @@
# pylint: disable=import-outside-toplevel, invalid-name, unused-argument, too-many-locals
import json
from unittest import mock
from uuid import UUID
import pytest
import sqlparse
from freezegun import freeze_time
from pytest_mock import MockerFixture
@ -27,9 +29,9 @@ from sqlalchemy.orm.session import Session
from superset import db
from superset.common.db_query_status import QueryStatus
from superset.errors import ErrorLevel, SupersetErrorType
from superset.exceptions import OAuth2Error
from superset.exceptions import OAuth2Error, SupersetErrorException
from superset.models.core import Database
from superset.sql_lab import get_sql_results
from superset.sql_lab import execute_sql_statements, get_sql_results
from superset.utils.core import override_user
from tests.unit_tests.models.core_test import oauth2_client_info
@ -125,6 +127,115 @@ def test_execute_sql_statement_with_rls(
SupersetResultSet.assert_called_with([(42,)], cursor.description, db_engine_spec)
@mock.patch.dict(
"superset.sql_lab.config",
{"SQLLAB_PAYLOAD_MAX_MB": 50}, # Set the desired config value for testing
)
def test_execute_sql_statement_exceeds_payload_limit(mocker: MockerFixture) -> None:
"""
Test for `execute_sql_statements` when the result payload size exceeds the limit.
"""
# Mock the query object and database
query = mocker.MagicMock()
query.limit = 1
query.database = mocker.MagicMock()
query.database.db_engine_spec.is_select_query.return_value = True
query.database.cache_timeout = 100
query.status = "RUNNING"
query.select_as_cta = False
query.database.allow_run_async = True
# Mock get_query to return our mocked query object
mocker.patch("superset.sql_lab.get_query", return_value=query)
# Mock sys.getsizeof to simulate a large payload size
mocker.patch("sys.getsizeof", return_value=100000000) # 100 MB
# Mock _serialize_payload
def mock_serialize_payload(payload, use_msgpack):
return "serialized_payload"
mocker.patch(
"superset.sql_lab._serialize_payload", side_effect=mock_serialize_payload
)
# Mock db.session.refresh to avoid AttributeError during session refresh
mocker.patch("superset.sql_lab.db.session.refresh", return_value=None)
# Mock the results backend to avoid "Results backend is not configured" error
mocker.patch("superset.sql_lab.results_backend", return_value=True)
# Test that the exception is raised when the payload exceeds the limit
with pytest.raises(SupersetErrorException):
execute_sql_statements(
query_id=1,
rendered_query="SELECT 42 AS answer",
return_results=True, # Simulate that results are being returned
store_results=True, # Not storing results but returning them
start_time=None,
expand_data=False,
log_params={},
)
@mock.patch.dict(
"superset.sql_lab.config",
{"SQLLAB_PAYLOAD_MAX_MB": 50}, # Set the desired config value for testing
)
def test_execute_sql_statement_within_payload_limit(mocker: MockerFixture) -> None:
"""
Test for `execute_sql_statements` when the result payload size is within the limit,
and check if the flow executes smoothly without raising any exceptions.
"""
# Mock the query object and database
query = mocker.MagicMock()
query.limit = 1
query.database = mocker.MagicMock()
query.database.db_engine_spec.is_select_query.return_value = True
query.database.cache_timeout = 100
query.status = "RUNNING"
query.select_as_cta = False
query.database.allow_run_async = True
# Mock get_query to return our mocked query object
mocker.patch("superset.sql_lab.get_query", return_value=query)
# Mock sys.getsizeof to simulate a payload size that is within the limit
mocker.patch("sys.getsizeof", return_value=10000000) # 10 MB (within limit)
# Mock _serialize_payload
def mock_serialize_payload(payload, use_msgpack):
return "serialized_payload"
mocker.patch(
"superset.sql_lab._serialize_payload", side_effect=mock_serialize_payload
)
# Mock db.session.refresh to avoid AttributeError during session refresh
mocker.patch("superset.sql_lab.db.session.refresh", return_value=None)
# Mock the results backend to avoid "Results backend is not configured" error
mocker.patch("superset.sql_lab.results_backend", return_value=True)
# Test that no exception is raised and the function executes smoothly
try:
execute_sql_statements(
query_id=1,
rendered_query="SELECT 42 AS answer",
return_results=True, # Simulate that results are being returned
store_results=True, # Not storing results but returning them
start_time=None,
expand_data=False,
log_params={},
)
except SupersetErrorException:
pytest.fail(
"SupersetErrorException should not have been raised for payload within the limit"
)
def test_sql_lab_insert_rls_as_subquery(
mocker: MockerFixture,
session: Session,