Pass full response to `query_cost_formatter` (#8652)

* Return full info when doing query cost estimation

* Add unit test

* Fix isort
This commit is contained in:
Beto Dealmeida 2019-11-26 11:49:19 -08:00 committed by GitHub
parent badcf820c9
commit 5f4e3adfd2
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 70 additions and 6 deletions

View File

@ -445,7 +445,7 @@ class PrestoEngineSpec(BaseEngineSpec):
@classmethod
def estimate_statement_cost( # pylint: disable=too-many-locals
cls, statement: str, database, cursor, user_name: str
) -> Dict[str, float]:
) -> Dict[str, Any]:
"""
Run a SQL query that estimates the cost of a given statement.
@ -453,7 +453,7 @@ class PrestoEngineSpec(BaseEngineSpec):
:param database: Database instance
:param cursor: Cursor instance
:param username: Effective username
:return: JSON estimate from Presto
:return: JSON response from Presto
"""
parsed_query = ParsedQuery(statement)
sql = parsed_query.stripped()
@ -479,11 +479,11 @@ class PrestoEngineSpec(BaseEngineSpec):
# }
# }
result = json.loads(cursor.fetchone()[0])
return result["estimate"]
return result
@classmethod
def query_cost_formatter(
cls, raw_cost: List[Dict[str, float]]
cls, raw_cost: List[Dict[str, Any]]
) -> List[Dict[str, str]]:
"""
Format cost estimate.
@ -516,10 +516,11 @@ class PrestoEngineSpec(BaseEngineSpec):
("networkCost", "Network cost", ""),
]
for row in raw_cost:
estimate: Dict[str, float] = row.get("estimate", {})
statement_cost = {}
for key, label, suffix in columns:
if key in row:
statement_cost[label] = humanize(row[key], suffix).strip()
if key in estimate:
statement_cost[label] = humanize(estimate[key], suffix).strip()
cost.append(statement_cost)
return cost

View File

@ -372,3 +372,66 @@ class PrestoTests(DbEngineSpecTestCase):
PrestoEngineSpec.convert_dttm("TIMESTAMP", dttm),
"from_iso8601_timestamp('2019-01-02T03:04:05.678900')",
)
def test_query_cost_formatter(self):
raw_cost = [
{
"inputTableColumnInfos": [
{
"table": {
"catalog": "hive",
"schemaTable": {
"schema": "default",
"table": "fact_passenger_state",
},
},
"columnConstraints": [
{
"columnName": "ds",
"typeSignature": "varchar",
"domain": {
"nullsAllowed": False,
"ranges": [
{
"low": {
"value": "2019-07-10",
"bound": "EXACTLY",
},
"high": {
"value": "2019-07-10",
"bound": "EXACTLY",
},
}
],
},
}
],
"estimate": {
"outputRowCount": 9.04969899e8,
"outputSizeInBytes": 3.54143678301e11,
"cpuCost": 3.54143678301e11,
"maxMemory": 0.0,
"networkCost": 0.0,
},
}
],
"estimate": {
"outputRowCount": 9.04969899e8,
"outputSizeInBytes": 3.54143678301e11,
"cpuCost": 3.54143678301e11,
"maxMemory": 0.0,
"networkCost": 3.54143678301e11,
},
}
]
formatted_cost = PrestoEngineSpec.query_cost_formatter(raw_cost)
expected = [
{
"Output count": "904 M rows",
"Output size": "354 GB",
"CPU cost": "354 G",
"Max memory": "0 B",
"Network cost": "354 G",
}
]
self.assertEqual(formatted_cost, expected)