From d6a82f7852176b10cf82e83b9d8729cc87edae39 Mon Sep 17 00:00:00 2001 From: Beto Dealmeida Date: Tue, 10 Dec 2024 13:09:39 -0500 Subject: [PATCH] feat: fine-grain chart data telemetry (#31273) --- superset/charts/data/api.py | 24 +++++----- superset/charts/post_processing.py | 18 +++++--- superset/models/core.py | 44 ++++++++++++++----- .../reports/commands_tests.py | 2 +- 4 files changed, 59 insertions(+), 29 deletions(-) diff --git a/superset/charts/data/api.py b/superset/charts/data/api.py index 653b09896..0f6e605c7 100644 --- a/superset/charts/data/api.py +++ b/superset/charts/data/api.py @@ -399,17 +399,19 @@ class ChartDataRestApi(ChartRestApi): for query in queries: with contextlib.suppress(KeyError): del query["query"] - response_data = json.dumps( - {"result": queries}, - default=json.json_int_dttm_ser, - ignore_nan=True, - ) + with event_logger.log_context(f"{self.__class__.__name__}.json_dumps"): + response_data = json.dumps( + {"result": queries}, + default=json.json_int_dttm_ser, + ignore_nan=True, + ) resp = make_response(response_data, 200) resp.headers["Content-Type"] = "application/json; charset=utf-8" return resp return self.response_400(message=f"Unsupported result_format: {result_format}") + @event_logger.log_this def _get_data_response( self, command: ChartDataCommand, @@ -435,11 +437,13 @@ class ChartDataRestApi(ChartRestApi): ) -> dict[str, Any]: return { "dashboard_id": form_data.get("form_data", {}).get("dashboardId"), - "dataset_id": form_data.get("datasource", {}).get("id") - if isinstance(form_data.get("datasource"), dict) - and form_data.get("datasource", {}).get("type") - == DatasourceType.TABLE.value - else None, + "dataset_id": ( + form_data.get("datasource", {}).get("id") + if isinstance(form_data.get("datasource"), dict) + and form_data.get("datasource", {}).get("type") + == DatasourceType.TABLE.value + else None + ), "slice_id": form_data.get("form_data", {}).get("slice_id"), } diff --git a/superset/charts/post_processing.py b/superset/charts/post_processing.py index 4c5abd8db..edc3c9447 100644 --- a/superset/charts/post_processing.py +++ b/superset/charts/post_processing.py @@ -34,6 +34,7 @@ import pandas as pd from flask_babel import gettext as __ from superset.common.chart_data import ChartDataResultFormat +from superset.extensions import event_logger from superset.utils.core import ( extract_dataframe_dtypes, get_column_names, @@ -296,6 +297,7 @@ post_processors = { } +@event_logger.log_this def apply_post_process( result: dict[Any, Any], form_data: Optional[dict[str, Any]] = None, @@ -344,15 +346,19 @@ def apply_post_process( # `Tuple[str]`. Otherwise encoding to JSON later will fail because # maps cannot have tuples as their keys in JSON. processed_df.columns = [ - " ".join(str(name) for name in column).strip() - if isinstance(column, tuple) - else column + ( + " ".join(str(name) for name in column).strip() + if isinstance(column, tuple) + else column + ) for column in processed_df.columns ] processed_df.index = [ - " ".join(str(name) for name in index).strip() - if isinstance(index, tuple) - else index + ( + " ".join(str(name) for name in index).strip() + if isinstance(index, tuple) + else index + ) for index in processed_df.index ] diff --git a/superset/models/core.py b/superset/models/core.py index 9b06932af..4ffbc6aaa 100755 --- a/superset/models/core.py +++ b/superset/models/core.py @@ -75,7 +75,11 @@ from superset.extensions import ( from superset.models.helpers import AuditMixinNullable, ImportExportMixin from superset.result_set import SupersetResultSet from superset.sql_parse import Table -from superset.superset_typing import OAuth2ClientConfig, ResultSetColumnType +from superset.superset_typing import ( + DbapiDescription, + OAuth2ClientConfig, + ResultSetColumnType, +) from superset.utils import cache as cache_util, core as utils, json from superset.utils.backports import StrEnum from superset.utils.core import DatasourceName, get_username @@ -667,7 +671,7 @@ class Database(Model, AuditMixinNullable, ImportExportMixin): # pylint: disable ) return sql_ - def get_df( # pylint: disable=too-many-locals + def get_df( self, sql: str, catalog: str | None = None, @@ -700,21 +704,37 @@ class Database(Model, AuditMixinNullable, ImportExportMixin): # pylint: disable object_ref=__name__, ): self.db_engine_spec.execute(cursor, sql_, self) - if i < len(sqls) - 1: - # If it's not the last, we don't keep the results - cursor.fetchall() - else: - # Last query, fetch and process the results - data = self.db_engine_spec.fetch_data(cursor) - result_set = SupersetResultSet( - data, cursor.description, self.db_engine_spec - ) - df = result_set.to_pandas_df() + + rows = self.fetch_rows(cursor, i == len(sqls) - 1) + if rows is not None: + df = self.load_into_dataframe(cursor.description, rows) + if mutator: df = mutator(df) return self.post_process_df(df) + @event_logger.log_this + def fetch_rows(self, cursor: Any, last: bool) -> list[tuple[Any, ...]] | None: + if not last: + cursor.fetchall() + return None + + return self.db_engine_spec.fetch_data(cursor) + + @event_logger.log_this + def load_into_dataframe( + self, + description: DbapiDescription, + data: list[tuple[Any, ...]], + ) -> pd.DataFrame: + result_set = SupersetResultSet( + data, + description, + self.db_engine_spec, + ) + return result_set.to_pandas_df() + def compile_sqla_query( self, qry: Select, diff --git a/tests/integration_tests/reports/commands_tests.py b/tests/integration_tests/reports/commands_tests.py index f18d454bb..d52301963 100644 --- a/tests/integration_tests/reports/commands_tests.py +++ b/tests/integration_tests/reports/commands_tests.py @@ -1710,7 +1710,7 @@ def test_alert_limit_is_applied( with patch.object( create_alert_email_chart.database.db_engine_spec, "fetch_data", - return_value=None, + return_value=[], ): # noqa: F841 AsyncExecuteReportScheduleCommand( TEST_ID, create_alert_email_chart.id, datetime.utcnow()