* added estimate_statement_cost to trino * file formatted Co-authored-by: rijojoseph01 <rijo.joseph@myntra.com>
This commit is contained in:
parent
5181a74116
commit
a2d69ea252
|
|
@ -15,9 +15,10 @@
|
|||
# specific language governing permissions and limitations
|
||||
# under the License.
|
||||
from datetime import datetime
|
||||
from typing import Any, Dict, Optional
|
||||
from typing import Any, Dict, List, Optional
|
||||
from urllib import parse
|
||||
|
||||
import simplejson as json
|
||||
from sqlalchemy.engine.url import make_url, URL
|
||||
|
||||
from superset.db_engine_specs.base import BaseEngineSpec
|
||||
|
|
@ -103,3 +104,83 @@ class TrinoEngineSpec(BaseEngineSpec):
|
|||
:param username: Effective username
|
||||
"""
|
||||
# Do nothing and let update_impersonation_config take care of impersonation
|
||||
|
||||
@classmethod
|
||||
def get_allow_cost_estimate(cls, extra: Dict[str, Any]) -> bool:
|
||||
return True
|
||||
|
||||
@classmethod
|
||||
def estimate_statement_cost( # pylint: disable=too-many-locals
|
||||
cls, statement: str, cursor: Any
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Run a SQL query that estimates the cost of a given statement.
|
||||
|
||||
:param statement: A single SQL statement
|
||||
:param database: Database instance
|
||||
:param cursor: Cursor instance
|
||||
:param username: Effective username
|
||||
:return: JSON response from Trino
|
||||
"""
|
||||
sql = f"EXPLAIN (TYPE IO, FORMAT JSON) {statement}"
|
||||
cursor.execute(sql)
|
||||
|
||||
# the output from Trino is a single column and a single row containing
|
||||
# JSON:
|
||||
#
|
||||
# {
|
||||
# ...
|
||||
# "estimate" : {
|
||||
# "outputRowCount" : 8.73265878E8,
|
||||
# "outputSizeInBytes" : 3.41425774958E11,
|
||||
# "cpuCost" : 3.41425774958E11,
|
||||
# "maxMemory" : 0.0,
|
||||
# "networkCost" : 3.41425774958E11
|
||||
# }
|
||||
# }
|
||||
result = json.loads(cursor.fetchone()[0])
|
||||
return result
|
||||
|
||||
@classmethod
|
||||
def query_cost_formatter(
|
||||
cls, raw_cost: List[Dict[str, Any]]
|
||||
) -> List[Dict[str, str]]:
|
||||
"""
|
||||
Format cost estimate.
|
||||
|
||||
:param raw_cost: JSON estimate from Trino
|
||||
:return: Human readable cost estimate
|
||||
"""
|
||||
|
||||
def humanize(value: Any, suffix: str) -> str:
|
||||
try:
|
||||
value = int(value)
|
||||
except ValueError:
|
||||
return str(value)
|
||||
|
||||
prefixes = ["K", "M", "G", "T", "P", "E", "Z", "Y"]
|
||||
prefix = ""
|
||||
to_next_prefix = 1000
|
||||
while value > to_next_prefix and prefixes:
|
||||
prefix = prefixes.pop(0)
|
||||
value //= to_next_prefix
|
||||
|
||||
return f"{value} {prefix}{suffix}"
|
||||
|
||||
cost = []
|
||||
columns = [
|
||||
("outputRowCount", "Output count", " rows"),
|
||||
("outputSizeInBytes", "Output size", "B"),
|
||||
("cpuCost", "CPU cost", ""),
|
||||
("maxMemory", "Max memory", "B"),
|
||||
("networkCost", "Network cost", ""),
|
||||
]
|
||||
for row in raw_cost:
|
||||
estimate: Dict[str, float] = row.get("estimate", {})
|
||||
statement_cost = {}
|
||||
for key, label, suffix in columns:
|
||||
if key in estimate:
|
||||
statement_cost[label] = humanize(estimate[key], suffix).strip()
|
||||
cost.append(statement_cost)
|
||||
|
||||
return cost
|
||||
|
|
|
|||
Loading…
Reference in New Issue