feat(trino,presto): add missing time grains (#30926)

This commit is contained in:
Ville Brofeldt 2024-11-15 12:38:28 -08:00 committed by GitHub
parent 824eaf84be
commit e528cb48c4
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 135 additions and 2 deletions

View File

@ -256,8 +256,15 @@ class PrestoBaseEngineSpec(BaseEngineSpec, metaclass=ABCMeta):
_time_grain_expressions = {
None: "{col}",
TimeGrain.SECOND: "date_trunc('second', CAST({col} AS TIMESTAMP))",
TimeGrain.FIVE_SECONDS: "date_trunc('second', CAST({col} AS TIMESTAMP)) - interval '1' second * (second(CAST({col} AS TIMESTAMP)) % 5)",
TimeGrain.THIRTY_SECONDS: "date_trunc('second', CAST({col} AS TIMESTAMP)) - interval '1' second * (second(CAST({col} AS TIMESTAMP)) % 30)",
TimeGrain.MINUTE: "date_trunc('minute', CAST({col} AS TIMESTAMP))",
TimeGrain.FIVE_MINUTES: "date_trunc('minute', CAST({col} AS TIMESTAMP)) - interval '1' minute * (minute(CAST({col} AS TIMESTAMP)) % 5)",
TimeGrain.TEN_MINUTES: "date_trunc('minute', CAST({col} AS TIMESTAMP)) - interval '1' minute * (minute(CAST({col} AS TIMESTAMP)) % 10)",
TimeGrain.FIFTEEN_MINUTES: "date_trunc('minute', CAST({col} AS TIMESTAMP)) - interval '1' minute * (minute(CAST({col} AS TIMESTAMP)) % 15)",
TimeGrain.HALF_HOUR: "date_trunc('minute', CAST({col} AS TIMESTAMP)) - interval '1' minute * (minute(CAST({col} AS TIMESTAMP)) % 30)",
TimeGrain.HOUR: "date_trunc('hour', CAST({col} AS TIMESTAMP))",
TimeGrain.SIX_HOURS: "date_trunc('hour', CAST({col} AS TIMESTAMP)) - interval '1' hour * (hour(CAST({col} AS TIMESTAMP)) % 6)",
TimeGrain.DAY: "date_trunc('day', CAST({col} AS TIMESTAMP))",
TimeGrain.WEEK: "date_trunc('week', CAST({col} AS TIMESTAMP))",
TimeGrain.MONTH: "date_trunc('month', CAST({col} AS TIMESTAMP))",

View File

@ -21,7 +21,7 @@ from unittest import mock
import pytest
import pytz
from pyhive.sqlalchemy_presto import PrestoDialect
from sqlalchemy import sql, text, types
from sqlalchemy import column, sql, text, types
from sqlalchemy.engine.url import make_url
from superset.sql_parse import Table
@ -240,3 +240,66 @@ def test_get_default_catalog() -> None:
sqlalchemy_uri="presto://localhost:8080/hive/default",
)
assert PrestoEngineSpec.get_default_catalog(database) == "hive"
@pytest.mark.parametrize(
"time_grain,expected_result",
[
("PT1S", "date_trunc('second', CAST(col AS TIMESTAMP))"),
(
"PT5S",
"date_trunc('second', CAST(col AS TIMESTAMP)) - interval '1' second * (second(CAST(col AS TIMESTAMP)) % 5)",
),
(
"PT30S",
"date_trunc('second', CAST(col AS TIMESTAMP)) - interval '1' second * (second(CAST(col AS TIMESTAMP)) % 30)",
),
("PT1M", "date_trunc('minute', CAST(col AS TIMESTAMP))"),
(
"PT5M",
"date_trunc('minute', CAST(col AS TIMESTAMP)) - interval '1' minute * (minute(CAST(col AS TIMESTAMP)) % 5)",
),
(
"PT10M",
"date_trunc('minute', CAST(col AS TIMESTAMP)) - interval '1' minute * (minute(CAST(col AS TIMESTAMP)) % 10)",
),
(
"PT15M",
"date_trunc('minute', CAST(col AS TIMESTAMP)) - interval '1' minute * (minute(CAST(col AS TIMESTAMP)) % 15)",
),
(
"PT0.5H",
"date_trunc('minute', CAST(col AS TIMESTAMP)) - interval '1' minute * (minute(CAST(col AS TIMESTAMP)) % 30)",
),
("PT1H", "date_trunc('hour', CAST(col AS TIMESTAMP))"),
(
"PT6H",
"date_trunc('hour', CAST(col AS TIMESTAMP)) - interval '1' hour * (hour(CAST(col AS TIMESTAMP)) % 6)",
),
("P1D", "date_trunc('day', CAST(col AS TIMESTAMP))"),
("P1W", "date_trunc('week', CAST(col AS TIMESTAMP))"),
("P1M", "date_trunc('month', CAST(col AS TIMESTAMP))"),
("P3M", "date_trunc('quarter', CAST(col AS TIMESTAMP))"),
("P1Y", "date_trunc('year', CAST(col AS TIMESTAMP))"),
(
"1969-12-28T00:00:00Z/P1W",
"date_trunc('week', CAST(col AS TIMESTAMP) + interval '1' day) - interval '1' day",
),
("1969-12-29T00:00:00Z/P1W", "date_trunc('week', CAST(col AS TIMESTAMP))"),
(
"P1W/1970-01-03T00:00:00Z",
"date_trunc('week', CAST(col AS TIMESTAMP) + interval '1' day) + interval '5' day",
),
(
"P1W/1970-01-04T00:00:00Z",
"date_trunc('week', CAST(col AS TIMESTAMP)) + interval '6' day",
),
],
)
def test_timegrain_expressions(time_grain: str, expected_result: str) -> None:
from superset.db_engine_specs.presto import PrestoEngineSpec as spec
actual = str(
spec.get_timestamp_expr(col=column("col"), pdf=None, time_grain=time_grain)
)
assert actual == expected_result

View File

@ -28,7 +28,7 @@ import pytest
from flask import g, has_app_context
from pytest_mock import MockerFixture
from requests.exceptions import ConnectionError as RequestsConnectionError
from sqlalchemy import sql, text, types
from sqlalchemy import column, sql, text, types
from sqlalchemy.dialects import sqlite
from sqlalchemy.engine.url import make_url
from sqlalchemy.exc import NoSuchTableError
@ -847,3 +847,66 @@ def test_get_oauth2_token(
},
timeout=30.0,
)
@pytest.mark.parametrize(
"time_grain,expected_result",
[
("PT1S", "date_trunc('second', CAST(col AS TIMESTAMP))"),
(
"PT5S",
"date_trunc('second', CAST(col AS TIMESTAMP)) - interval '1' second * (second(CAST(col AS TIMESTAMP)) % 5)",
),
(
"PT30S",
"date_trunc('second', CAST(col AS TIMESTAMP)) - interval '1' second * (second(CAST(col AS TIMESTAMP)) % 30)",
),
("PT1M", "date_trunc('minute', CAST(col AS TIMESTAMP))"),
(
"PT5M",
"date_trunc('minute', CAST(col AS TIMESTAMP)) - interval '1' minute * (minute(CAST(col AS TIMESTAMP)) % 5)",
),
(
"PT10M",
"date_trunc('minute', CAST(col AS TIMESTAMP)) - interval '1' minute * (minute(CAST(col AS TIMESTAMP)) % 10)",
),
(
"PT15M",
"date_trunc('minute', CAST(col AS TIMESTAMP)) - interval '1' minute * (minute(CAST(col AS TIMESTAMP)) % 15)",
),
(
"PT0.5H",
"date_trunc('minute', CAST(col AS TIMESTAMP)) - interval '1' minute * (minute(CAST(col AS TIMESTAMP)) % 30)",
),
("PT1H", "date_trunc('hour', CAST(col AS TIMESTAMP))"),
(
"PT6H",
"date_trunc('hour', CAST(col AS TIMESTAMP)) - interval '1' hour * (hour(CAST(col AS TIMESTAMP)) % 6)",
),
("P1D", "date_trunc('day', CAST(col AS TIMESTAMP))"),
("P1W", "date_trunc('week', CAST(col AS TIMESTAMP))"),
("P1M", "date_trunc('month', CAST(col AS TIMESTAMP))"),
("P3M", "date_trunc('quarter', CAST(col AS TIMESTAMP))"),
("P1Y", "date_trunc('year', CAST(col AS TIMESTAMP))"),
(
"1969-12-28T00:00:00Z/P1W",
"date_trunc('week', CAST(col AS TIMESTAMP) + interval '1' day) - interval '1' day",
),
("1969-12-29T00:00:00Z/P1W", "date_trunc('week', CAST(col AS TIMESTAMP))"),
(
"P1W/1970-01-03T00:00:00Z",
"date_trunc('week', CAST(col AS TIMESTAMP) + interval '1' day) + interval '5' day",
),
(
"P1W/1970-01-04T00:00:00Z",
"date_trunc('week', CAST(col AS TIMESTAMP)) + interval '6' day",
),
],
)
def test_timegrain_expressions(time_grain: str, expected_result: str) -> None:
from superset.db_engine_specs.trino import TrinoEngineSpec as spec
actual = str(
spec.get_timestamp_expr(col=column("col"), pdf=None, time_grain=time_grain)
)
assert actual == expected_result