From 001100ddf02db7be1ea94a1cac16475d00436b78 Mon Sep 17 00:00:00 2001 From: Zamar Date: Mon, 9 Jan 2023 07:56:19 +0100 Subject: [PATCH] feat(db_engine_specs): big query cost estimation (#21325) Co-authored-by: zamar roura Co-authored-by: Zamar Roura --- .../src/SqlLab/reducers/sqlLab.js | 6 +- .../database/DatabaseModal/ExtraOptions.tsx | 2 +- superset/config.py | 19 ++-- superset/db_engine_specs/bigquery.py | 92 +++++++++++++++++++ .../translations/de/LC_MESSAGES/messages.json | 2 +- .../translations/de/LC_MESSAGES/messages.po | 2 +- .../translations/en/LC_MESSAGES/messages.po | 2 +- .../translations/es/LC_MESSAGES/messages.po | 2 +- .../translations/fr/LC_MESSAGES/messages.json | 2 +- .../translations/fr/LC_MESSAGES/messages.po | 2 +- .../translations/it/LC_MESSAGES/messages.po | 2 +- .../translations/ja/LC_MESSAGES/messages.po | 2 +- .../translations/ko/LC_MESSAGES/messages.po | 2 +- superset/translations/messages.pot | 2 +- .../translations/nl/LC_MESSAGES/messages.json | 2 +- .../translations/nl/LC_MESSAGES/messages.po | 2 +- .../pt_BR/LC_MESSAGES/messages.po | 2 +- .../translations/ru/LC_MESSAGES/messages.po | 2 +- .../translations/sk/LC_MESSAGES/messages.po | 2 +- .../translations/sl/LC_MESSAGES/messages.json | 15 +-- .../translations/sl/LC_MESSAGES/messages.po | 7 +- .../translations/zh/LC_MESSAGES/messages.po | 2 +- 22 files changed, 128 insertions(+), 45 deletions(-) diff --git a/superset-frontend/src/SqlLab/reducers/sqlLab.js b/superset-frontend/src/SqlLab/reducers/sqlLab.js index 478487d6e..e3bb196fb 100644 --- a/superset-frontend/src/SqlLab/reducers/sqlLab.js +++ b/superset-frontend/src/SqlLab/reducers/sqlLab.js @@ -320,7 +320,7 @@ export default function sqlLabReducer(state = {}, action) { ...state, queryCostEstimates: { ...state.queryCostEstimates, - [action.query.sqlEditorId]: { + [action.query.id]: { completed: false, cost: null, error: null, @@ -333,7 +333,7 @@ export default function sqlLabReducer(state = {}, action) { ...state, queryCostEstimates: { ...state.queryCostEstimates, - [action.query.sqlEditorId]: { + [action.query.id]: { completed: true, cost: action.json, error: null, @@ -346,7 +346,7 @@ export default function sqlLabReducer(state = {}, action) { ...state, queryCostEstimates: { ...state.queryCostEstimates, - [action.query.sqlEditorId]: { + [action.query.id]: { completed: false, cost: null, error: action.error, diff --git a/superset-frontend/src/views/CRUD/data/database/DatabaseModal/ExtraOptions.tsx b/superset-frontend/src/views/CRUD/data/database/DatabaseModal/ExtraOptions.tsx index c8c03878c..82eeea05a 100644 --- a/superset-frontend/src/views/CRUD/data/database/DatabaseModal/ExtraOptions.tsx +++ b/superset-frontend/src/views/CRUD/data/database/DatabaseModal/ExtraOptions.tsx @@ -171,7 +171,7 @@ const ExtraOptions = ({ /> diff --git a/superset/config.py b/superset/config.py index 5cdf1c7cb..d8ff88978 100644 --- a/superset/config.py +++ b/superset/config.py @@ -478,6 +478,11 @@ DEFAULT_FEATURE_FLAGS: Dict[str, bool] = { "DRILL_TO_DETAIL": False, "DATAPANEL_CLOSED_BY_DEFAULT": False, "HORIZONTAL_FILTER_BAR": False, + # The feature is off by default, and currently only supported in Presto and Postgres, + # and Bigquery. + # It also needs to be enabled on a per-database basis, by adding the key/value pair + # `cost_estimate_enabled: true` to the database `extra` attribute. + "ESTIMATE_QUERY_COST": False, # Allow users to enable ssh tunneling when creating a DB. # Users must check whether the DB engine supports SSH Tunnels # otherwise enabling this flag won't have any effect on the DB. @@ -932,16 +937,14 @@ SQLLAB_ASYNC_TIME_LIMIT_SEC = int(timedelta(hours=6).total_seconds()) # query costs before they run. These EXPLAIN queries should have a small # timeout. SQLLAB_QUERY_COST_ESTIMATE_TIMEOUT = int(timedelta(seconds=10).total_seconds()) -# The feature is off by default, and currently only supported in Presto and Postgres. -# It also need to be enabled on a per-database basis, by adding the key/value pair -# `cost_estimate_enabled: true` to the database `extra` attribute. -ESTIMATE_QUERY_COST = False + # The cost returned by the databases is a relative value; in order to map the cost to # a tangible value you need to define a custom formatter that takes into consideration # your specific infrastructure. For example, you could analyze queries a posteriori by # running EXPLAIN on them, and compute a histogram of relative costs to present the -# cost as a percentile: -# +# cost as a percentile, this step is optional as every db engine spec has its own +# query cost formatter, but it you wanna customize it you can define it inside the config: + # def postgres_query_cost_formatter( # result: List[Dict[str, Any]] # ) -> List[Dict[str, str]]: @@ -959,9 +962,7 @@ ESTIMATE_QUERY_COST = False # # return out # -# Then on define the formatter on the config: -# -# "QUERY_COST_FORMATTERS_BY_ENGINE": {"postgresql": postgres_query_cost_formatter}, +# QUERY_COST_FORMATTERS_BY_ENGINE: {"postgresql": postgres_query_cost_formatter} QUERY_COST_FORMATTERS_BY_ENGINE: Dict[ str, Callable[[List[Dict[str, Any]]], List[Dict[str, Any]]] ] = {} diff --git a/superset/db_engine_specs/bigquery.py b/superset/db_engine_specs/bigquery.py index 300f8c25a..52116d487 100644 --- a/superset/db_engine_specs/bigquery.py +++ b/superset/db_engine_specs/bigquery.py @@ -31,6 +31,7 @@ from sqlalchemy.engine.base import Engine from sqlalchemy.sql import sqltypes from typing_extensions import TypedDict +from superset import sql_parse from superset.constants import PASSWORD_MASK from superset.databases.schemas import encrypted_field_properties, EncryptedString from superset.databases.utils import make_url_safe @@ -364,6 +365,97 @@ class BigQueryEngineSpec(BaseEngineSpec): pandas_gbq.to_gbq(df, **to_gbq_kwargs) + @classmethod + def estimate_query_cost( + cls, + database: "Database", + schema: str, + sql: str, + source: Optional[utils.QuerySource] = None, + ) -> List[Dict[str, Any]]: + """ + Estimate the cost of a multiple statement SQL query. + + :param database: Database instance + :param schema: Database schema + :param sql: SQL query with possibly multiple statements + :param source: Source of the query (eg, "sql_lab") + """ + extra = database.get_extra() or {} + if not cls.get_allow_cost_estimate(extra): + raise Exception("Database does not support cost estimation") + + parsed_query = sql_parse.ParsedQuery(sql) + statements = parsed_query.get_statements() + costs = [] + for statement in statements: + processed_statement = cls.process_statement(statement, database) + + costs.append(cls.estimate_statement_cost(processed_statement, database)) + return costs + + @classmethod + def get_allow_cost_estimate(cls, extra: Dict[str, Any]) -> bool: + return True + + @classmethod + def estimate_statement_cost(cls, statement: str, cursor: Any) -> Dict[str, Any]: + try: + # pylint: disable=import-outside-toplevel + # It's the only way to perfom a dry-run estimate cost + from google.cloud import bigquery + from google.oauth2 import service_account + except ImportError as ex: + raise Exception( + "Could not import libraries `pygibquery` or `google.oauth2`, which are " + "required to be installed in your environment in order " + "to upload data to BigQuery" + ) from ex + + with cls.get_engine(cursor) as engine: + creds = engine.dialect.credentials_info + + creds = service_account.Credentials.from_service_account_info(creds) + client = bigquery.Client(credentials=creds) + job_config = bigquery.QueryJobConfig(dry_run=True) + + query_job = client.query( + statement, + job_config=job_config, + ) # Make an API request. + + # Format Bytes. + # TODO: Humanize in case more db engine specs need to be added, + # this should be made a function outside this scope. + byte_division = 1024 + if hasattr(query_job, "total_bytes_processed"): + query_bytes_processed = query_job.total_bytes_processed + if query_bytes_processed // byte_division == 0: + byte_type = "B" + total_bytes_processed = query_bytes_processed + elif query_bytes_processed // (byte_division**2) == 0: + byte_type = "KB" + total_bytes_processed = round(query_bytes_processed / byte_division, 2) + elif query_bytes_processed // (byte_division**3) == 0: + byte_type = "MB" + total_bytes_processed = round( + query_bytes_processed / (byte_division**2), 2 + ) + else: + byte_type = "GB" + total_bytes_processed = round( + query_bytes_processed / (byte_division**3), 2 + ) + + return {f"{byte_type} Processed": total_bytes_processed} + return {} + + @classmethod + def query_cost_formatter( + cls, raw_cost: List[Dict[str, Any]] + ) -> List[Dict[str, str]]: + return [{k: str(v) for k, v in row.items()} for row in raw_cost] + @classmethod def build_sqlalchemy_uri( cls, diff --git a/superset/translations/de/LC_MESSAGES/messages.json b/superset/translations/de/LC_MESSAGES/messages.json index 9abf6b6d8..96dab1f54 100644 --- a/superset/translations/de/LC_MESSAGES/messages.json +++ b/superset/translations/de/LC_MESSAGES/messages.json @@ -1963,7 +1963,7 @@ "Font size for the smallest value in the list": [ "Schriftgröße für den kleinsten Wert in der Liste" ], - "For Presto and Postgres, shows a button to compute cost before running a query.": [ + "For Bigquery, Presto and Postgres, shows a button to compute cost before running a query.": [ "Für Presto und Postgres wird ein Buttons angezeigt, um Kosten vor dem Ausführen einer Abfrage zu schätzen." ], "For regular filters, these are the roles this filter will be applied to. For base filters, these are the roles that the filter DOES NOT apply to, e.g. Admin if admin should see all data.": [ diff --git a/superset/translations/de/LC_MESSAGES/messages.po b/superset/translations/de/LC_MESSAGES/messages.po index 8726ceae1..e6d13c552 100644 --- a/superset/translations/de/LC_MESSAGES/messages.po +++ b/superset/translations/de/LC_MESSAGES/messages.po @@ -6114,7 +6114,7 @@ msgstr "Schriftgröße für den kleinsten Wert in der Liste" #: superset-frontend/src/views/CRUD/data/database/DatabaseModal/ExtraOptions.tsx:179 msgid "" -"For Presto and Postgres, shows a button to compute cost before running a " +"For Bigquery, Presto and Postgres, shows a button to compute cost before running a " "query." msgstr "" "Für Presto und Postgres wird ein Buttons angezeigt, um Kosten vor dem " diff --git a/superset/translations/en/LC_MESSAGES/messages.po b/superset/translations/en/LC_MESSAGES/messages.po index 4fb9477b0..45381a7bc 100644 --- a/superset/translations/en/LC_MESSAGES/messages.po +++ b/superset/translations/en/LC_MESSAGES/messages.po @@ -5693,7 +5693,7 @@ msgstr "" #: superset-frontend/src/views/CRUD/data/database/DatabaseModal/ExtraOptions.tsx:179 msgid "" -"For Presto and Postgres, shows a button to compute cost before running a " +"For Bigquery, Presto and Postgres, shows a button to compute cost before running a " "query." msgstr "" diff --git a/superset/translations/es/LC_MESSAGES/messages.po b/superset/translations/es/LC_MESSAGES/messages.po index c5af1d4a4..6d1b32b8e 100644 --- a/superset/translations/es/LC_MESSAGES/messages.po +++ b/superset/translations/es/LC_MESSAGES/messages.po @@ -5975,7 +5975,7 @@ msgstr "" #: superset-frontend/src/views/CRUD/data/database/DatabaseModal/ExtraOptions.tsx:179 #, fuzzy msgid "" -"For Presto and Postgres, shows a button to compute cost before running a " +"For Bigquery, Presto and Postgres, shows a button to compute cost before running a " "query." msgstr "Estimar el costo antes de ejecutar una consulta" diff --git a/superset/translations/fr/LC_MESSAGES/messages.json b/superset/translations/fr/LC_MESSAGES/messages.json index 90d271e24..d0ddca087 100644 --- a/superset/translations/fr/LC_MESSAGES/messages.json +++ b/superset/translations/fr/LC_MESSAGES/messages.json @@ -3419,7 +3419,7 @@ "Enable query cost estimation": [ "Activer l'estimation du coût de la requête" ], - "For Presto and Postgres, shows a button to compute cost before running a query.": [ + "For Bigquery, Presto and Postgres, shows a button to compute cost before running a query.": [ "Pour Presto et Postgres, affiche un bouton pour calculer le coût avant d'exécuter une requête." ], "Allow this database to be explored": [ diff --git a/superset/translations/fr/LC_MESSAGES/messages.po b/superset/translations/fr/LC_MESSAGES/messages.po index 5472d7459..d125ecd3c 100644 --- a/superset/translations/fr/LC_MESSAGES/messages.po +++ b/superset/translations/fr/LC_MESSAGES/messages.po @@ -6118,7 +6118,7 @@ msgstr "" #: superset-frontend/src/views/CRUD/data/database/DatabaseModal/ExtraOptions.tsx:179 msgid "" -"For Presto and Postgres, shows a button to compute cost before running a " +"For Bigquery, Presto and Postgres, shows a button to compute cost before running a " "query." msgstr "" "Pour Presto et Postgres, affiche un bouton pour calculer le coût avant " diff --git a/superset/translations/it/LC_MESSAGES/messages.po b/superset/translations/it/LC_MESSAGES/messages.po index 3873a4b5a..1947ea8d9 100644 --- a/superset/translations/it/LC_MESSAGES/messages.po +++ b/superset/translations/it/LC_MESSAGES/messages.po @@ -5848,7 +5848,7 @@ msgstr "" #: superset-frontend/src/views/CRUD/data/database/DatabaseModal/ExtraOptions.tsx:179 msgid "" -"For Presto and Postgres, shows a button to compute cost before running a " +"For Bigquery, Presto and Postgres, shows a button to compute cost before running a " "query." msgstr "" diff --git a/superset/translations/ja/LC_MESSAGES/messages.po b/superset/translations/ja/LC_MESSAGES/messages.po index d34ad5518..d0f0eae5d 100644 --- a/superset/translations/ja/LC_MESSAGES/messages.po +++ b/superset/translations/ja/LC_MESSAGES/messages.po @@ -5833,7 +5833,7 @@ msgstr "" #: superset-frontend/src/views/CRUD/data/database/DatabaseModal/ExtraOptions.tsx:179 msgid "" -"For Presto and Postgres, shows a button to compute cost before running a " +"For Bigquery, Presto and Postgres, shows a button to compute cost before running a " "query." msgstr "" diff --git a/superset/translations/ko/LC_MESSAGES/messages.po b/superset/translations/ko/LC_MESSAGES/messages.po index 5b4530e03..47aeba023 100644 --- a/superset/translations/ko/LC_MESSAGES/messages.po +++ b/superset/translations/ko/LC_MESSAGES/messages.po @@ -5798,7 +5798,7 @@ msgstr "" #: superset-frontend/src/views/CRUD/data/database/DatabaseModal/ExtraOptions.tsx:179 msgid "" -"For Presto and Postgres, shows a button to compute cost before running a " +"For Bigquery, Presto and Postgres, shows a button to compute cost before running a " "query." msgstr "" diff --git a/superset/translations/messages.pot b/superset/translations/messages.pot index 98c91df9b..28fe2143b 100644 --- a/superset/translations/messages.pot +++ b/superset/translations/messages.pot @@ -5698,7 +5698,7 @@ msgstr "" #: superset-frontend/src/views/CRUD/data/database/DatabaseModal/ExtraOptions.tsx:179 msgid "" -"For Presto and Postgres, shows a button to compute cost before running a " +"For Bigquery, Presto and Postgres, shows a button to compute cost before running a " "query." msgstr "" diff --git a/superset/translations/nl/LC_MESSAGES/messages.json b/superset/translations/nl/LC_MESSAGES/messages.json index 43bbd87a9..9d94c13ef 100644 --- a/superset/translations/nl/LC_MESSAGES/messages.json +++ b/superset/translations/nl/LC_MESSAGES/messages.json @@ -4487,7 +4487,7 @@ "Sta manipulatie van de database toe met niet-SELECT statements zoals UPDATE, DELETE, CREATE, enz." ], "Enable query cost estimation": [""], - "For Presto and Postgres, shows a button to compute cost before running a query.": [ + "For Bigquery, Presto and Postgres, shows a button to compute cost before running a query.": [ "" ], "Allow this database to be explored": [""], diff --git a/superset/translations/nl/LC_MESSAGES/messages.po b/superset/translations/nl/LC_MESSAGES/messages.po index 5db5e5ae4..558d58718 100644 --- a/superset/translations/nl/LC_MESSAGES/messages.po +++ b/superset/translations/nl/LC_MESSAGES/messages.po @@ -15015,7 +15015,7 @@ msgstr "" #: superset-frontend/src/views/CRUD/data/database/DatabaseModal/ExtraOptions.tsx:179 msgid "" -"For Presto and Postgres, shows a button to compute cost before running a " +"For Bigquery, Presto and Postgres, shows a button to compute cost before running a " "query." msgstr "" diff --git a/superset/translations/pt_BR/LC_MESSAGES/messages.po b/superset/translations/pt_BR/LC_MESSAGES/messages.po index 2441b2c76..92a58e643 100644 --- a/superset/translations/pt_BR/LC_MESSAGES/messages.po +++ b/superset/translations/pt_BR/LC_MESSAGES/messages.po @@ -6115,7 +6115,7 @@ msgstr "" #: superset-frontend/src/views/CRUD/data/database/DatabaseModal/ExtraOptions.tsx:179 #, fuzzy msgid "" -"For Presto and Postgres, shows a button to compute cost before running a " +"For Bigquery, Presto and Postgres, shows a button to compute cost before running a " "query." msgstr "Estima o custo antes de executar uma consulta" diff --git a/superset/translations/ru/LC_MESSAGES/messages.po b/superset/translations/ru/LC_MESSAGES/messages.po index 8cfde5eed..112038870 100644 --- a/superset/translations/ru/LC_MESSAGES/messages.po +++ b/superset/translations/ru/LC_MESSAGES/messages.po @@ -6040,7 +6040,7 @@ msgstr "" #: superset-frontend/src/views/CRUD/data/database/DatabaseModal/ExtraOptions.tsx:179 #, fuzzy msgid "" -"For Presto and Postgres, shows a button to compute cost before running a " +"For Bigquery, Presto and Postgres, shows a button to compute cost before running a " "query." msgstr "Спрогнозировать время до выполнения запроса" diff --git a/superset/translations/sk/LC_MESSAGES/messages.po b/superset/translations/sk/LC_MESSAGES/messages.po index 6429587f8..a0e2a0c31 100644 --- a/superset/translations/sk/LC_MESSAGES/messages.po +++ b/superset/translations/sk/LC_MESSAGES/messages.po @@ -5710,7 +5710,7 @@ msgstr "" #: superset-frontend/src/views/CRUD/data/database/DatabaseModal/ExtraOptions.tsx:179 msgid "" -"For Presto and Postgres, shows a button to compute cost before running a " +"For Bigquery, Presto and Postgres, shows a button to compute cost before running a " "query." msgstr "" diff --git a/superset/translations/sl/LC_MESSAGES/messages.json b/superset/translations/sl/LC_MESSAGES/messages.json index 1387b5faa..232b978d4 100644 --- a/superset/translations/sl/LC_MESSAGES/messages.json +++ b/superset/translations/sl/LC_MESSAGES/messages.json @@ -3453,17 +3453,8 @@ "Are you sure you want to overwrite this dataset?": [ "Ali ste prepričani, da želite prepisati podatkovni set?" ], - "Undefined": ["Ni definirano"], - "Save": ["Shrani"], - "Save as": ["Shrani kot"], - "Save query": ["Shrani poizvedbo"], - "Update": ["Posodobi"], - "Label for your query": ["Ime vaše poizvedbe"], - "Write a description for your query": ["Dodajte opis vaše poizvedbe"], - "Schedule query": ["Urnik poizvedb"], - "Schedule": ["Urnik"], - "There was an error with your request": [ - "Pri zahtevi je prišlo do napake" + "For Bigquery, Presto and Postgres, shows a button to compute cost before running a query.": [ + "Za Presto in Postgres prikaže gumb za izračun potratnosti pred zagonom poizvedbe." ], "Please save the query to enable sharing": [ "Shranite poizvedbo za deljenje" @@ -5138,7 +5129,7 @@ "Enable query cost estimation": [ "Omogoči ocenjevanje potratnosti poizvedbe" ], - "For Presto and Postgres, shows a button to compute cost before running a query.": [ + "For Bigquery, Presto and Postgres, shows a button to compute cost before running a query.": [ "Za Presto in Postgres prikaže gumb za izračun potratnosti pred zagonom poizvedbe." ], "Allow this database to be explored": [ diff --git a/superset/translations/sl/LC_MESSAGES/messages.po b/superset/translations/sl/LC_MESSAGES/messages.po index 8f655c74d..0f545732a 100644 --- a/superset/translations/sl/LC_MESSAGES/messages.po +++ b/superset/translations/sl/LC_MESSAGES/messages.po @@ -6696,9 +6696,8 @@ msgstr "Ciljno razmerje za razdelke drevesnega grafikona." #: superset-frontend/plugins/legacy-plugin-chart-treemap/src/index.js:31 msgid "" -"Shows the composition of a dataset by segmenting a given rectangle as smaller " -"rectangles with areas proportional to their value or contribution to the whole. " -"Those rectangles may also, in turn, be further segmented hierarchically." +"For Bigquery, Presto and Postgres, shows a button to compute cost before running a " +"query." msgstr "" "Prikaže zgradbo podatkovnega seta na podlagi segmentacije danega pravokotnika na " "manjše pravokotnike, pri čemer je ploščina sorazmerna vrednostim oz. deležem. " @@ -15788,7 +15787,7 @@ msgstr "Omogoči ocenjevanje potratnosti poizvedbe" #: superset-frontend/src/views/CRUD/data/database/DatabaseModal/ExtraOptions.tsx:179 msgid "" -"For Presto and Postgres, shows a button to compute cost before running a query." +"For Bigquery, Presto and Postgres, shows a button to compute cost before running a query." msgstr "" "Za Presto in Postgres prikaže gumb za izračun potratnosti pred zagonom poizvedbe." diff --git a/superset/translations/zh/LC_MESSAGES/messages.po b/superset/translations/zh/LC_MESSAGES/messages.po index ea3762b1c..fb4ac8340 100644 --- a/superset/translations/zh/LC_MESSAGES/messages.po +++ b/superset/translations/zh/LC_MESSAGES/messages.po @@ -5924,7 +5924,7 @@ msgstr "列表中最小值的字体大小" #: superset-frontend/src/views/CRUD/data/database/DatabaseModal/ExtraOptions.tsx:179 #, fuzzy msgid "" -"For Presto and Postgres, shows a button to compute cost before running a " +"For Bigquery, Presto and Postgres, shows a button to compute cost before running a " "query." msgstr "在运行查询之前计算执行计划"