feat(trino): add support for query cost estimate #15166 (#15177)

* added estimate_statement_cost to trino

* file formatted

Co-authored-by: rijojoseph01 <rijo.joseph@myntra.com>
This commit is contained in:
rijojoseph07 2021-06-30 15:35:22 +05:30 committed by GitHub
parent 5181a74116
commit a2d69ea252
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
1 changed files with 82 additions and 1 deletions

View File

@ -15,9 +15,10 @@
# specific language governing permissions and limitations
# under the License.
from datetime import datetime
from typing import Any, Dict, Optional
from typing import Any, Dict, List, Optional
from urllib import parse
import simplejson as json
from sqlalchemy.engine.url import make_url, URL
from superset.db_engine_specs.base import BaseEngineSpec
@ -103,3 +104,83 @@ class TrinoEngineSpec(BaseEngineSpec):
:param username: Effective username
"""
# Do nothing and let update_impersonation_config take care of impersonation
@classmethod
def get_allow_cost_estimate(cls, extra: Dict[str, Any]) -> bool:
return True
@classmethod
def estimate_statement_cost( # pylint: disable=too-many-locals
cls, statement: str, cursor: Any
) -> Dict[str, Any]:
"""
Run a SQL query that estimates the cost of a given statement.
:param statement: A single SQL statement
:param database: Database instance
:param cursor: Cursor instance
:param username: Effective username
:return: JSON response from Trino
"""
sql = f"EXPLAIN (TYPE IO, FORMAT JSON) {statement}"
cursor.execute(sql)
# the output from Trino is a single column and a single row containing
# JSON:
#
# {
# ...
# "estimate" : {
# "outputRowCount" : 8.73265878E8,
# "outputSizeInBytes" : 3.41425774958E11,
# "cpuCost" : 3.41425774958E11,
# "maxMemory" : 0.0,
# "networkCost" : 3.41425774958E11
# }
# }
result = json.loads(cursor.fetchone()[0])
return result
@classmethod
def query_cost_formatter(
cls, raw_cost: List[Dict[str, Any]]
) -> List[Dict[str, str]]:
"""
Format cost estimate.
:param raw_cost: JSON estimate from Trino
:return: Human readable cost estimate
"""
def humanize(value: Any, suffix: str) -> str:
try:
value = int(value)
except ValueError:
return str(value)
prefixes = ["K", "M", "G", "T", "P", "E", "Z", "Y"]
prefix = ""
to_next_prefix = 1000
while value > to_next_prefix and prefixes:
prefix = prefixes.pop(0)
value //= to_next_prefix
return f"{value} {prefix}{suffix}"
cost = []
columns = [
("outputRowCount", "Output count", " rows"),
("outputSizeInBytes", "Output size", "B"),
("cpuCost", "CPU cost", ""),
("maxMemory", "Max memory", "B"),
("networkCost", "Network cost", ""),
]
for row in raw_cost:
estimate: Dict[str, float] = row.get("estimate", {})
statement_cost = {}
for key, label, suffix in columns:
if key in estimate:
statement_cost[label] = humanize(estimate[key], suffix).strip()
cost.append(statement_cost)
return cost