feat: improve SQLite DB engine spec (#24909)

This commit is contained in:
Beto Dealmeida 2023-08-07 15:59:56 -07:00 committed by GitHub
parent 3b81538e02
commit 85a7d5cb3e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 383 additions and 37 deletions

View File

@ -32,7 +32,7 @@ from typing_extensions import TypedDict
from superset import security_manager
from superset.constants import PASSWORD_MASK
from superset.databases.schemas import encrypted_field_properties, EncryptedString
from superset.db_engine_specs.sqlite import SqliteEngineSpec
from superset.db_engine_specs.shillelagh import ShillelaghEngineSpec
from superset.errors import ErrorLevel, SupersetError, SupersetErrorType
if TYPE_CHECKING:
@ -65,14 +65,13 @@ class GSheetsPropertiesType(TypedDict):
catalog: dict[str, str]
class GSheetsEngineSpec(SqliteEngineSpec):
class GSheetsEngineSpec(ShillelaghEngineSpec):
"""Engine for Google spreadsheets"""
engine = "gsheets"
engine_name = "Google Sheets"
engine = "gsheets"
allows_joins = True
allows_subqueries = True
disable_ssh_tunneling = True
parameters_schema = GSheetsParametersSchema()
default_driver = "apsw"

View File

@ -14,8 +14,15 @@
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
from __future__ import annotations
from typing import TYPE_CHECKING
from superset.db_engine_specs.sqlite import SqliteEngineSpec
if TYPE_CHECKING:
from superset.models.core import Database
class ShillelaghEngineSpec(SqliteEngineSpec):
"""Engine for shillelagh"""
@ -28,3 +35,14 @@ class ShillelaghEngineSpec(SqliteEngineSpec):
allows_joins = True
allows_subqueries = True
@classmethod
def get_function_names(
cls,
database: Database,
) -> list[str]:
return super().get_function_names(database) + [
"sleep",
"version",
"get_metadata",
]

View File

@ -14,10 +14,13 @@
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
from __future__ import annotations
import re
from datetime import datetime
from re import Pattern
from typing import Any, Optional, TYPE_CHECKING
from typing import Any, TYPE_CHECKING
from flask_babel import gettext as __
from sqlalchemy import types
@ -39,11 +42,41 @@ class SqliteEngineSpec(BaseEngineSpec):
engine = "sqlite"
engine_name = "SQLite"
disable_ssh_tunneling = True
_time_grain_expressions = {
None: "{col}",
TimeGrain.SECOND: "DATETIME(STRFTIME('%Y-%m-%dT%H:%M:%S', {col}))",
TimeGrain.FIVE_SECONDS: (
"DATETIME({col}, printf('-%d seconds', "
"CAST(strftime('%S', {col}) AS INT) % 5))"
),
TimeGrain.THIRTY_SECONDS: (
"DATETIME({col}, printf('-%d seconds', "
"CAST(strftime('%S', {col}) AS INT) % 30))"
),
TimeGrain.MINUTE: "DATETIME(STRFTIME('%Y-%m-%dT%H:%M:00', {col}))",
TimeGrain.FIVE_MINUTES: (
"DATETIME(STRFTIME('%Y-%m-%dT%H:%M:00', {col}), printf('-%d minutes', "
"CAST(strftime('%M', {col}) AS INT) % 5))"
),
TimeGrain.TEN_MINUTES: (
"DATETIME(STRFTIME('%Y-%m-%dT%H:%M:00', {col}), printf('-%d minutes', "
"CAST(strftime('%M', {col}) AS INT) % 10))"
),
TimeGrain.FIFTEEN_MINUTES: (
"DATETIME(STRFTIME('%Y-%m-%dT%H:%M:00', {col}), printf('-%d minutes', "
"CAST(strftime('%M', {col}) AS INT) % 15))"
),
TimeGrain.THIRTY_MINUTES: (
"DATETIME(STRFTIME('%Y-%m-%dT%H:%M:00', {col}), printf('-%d minutes', "
"CAST(strftime('%M', {col}) AS INT) % 30))"
),
TimeGrain.HOUR: "DATETIME(STRFTIME('%Y-%m-%dT%H:00:00', {col}))",
TimeGrain.SIX_HOURS: (
"DATETIME(STRFTIME('%Y-%m-%dT%H:00:00', {col}), printf('-%d hours', "
"CAST(strftime('%H', {col}) AS INT) % 6))"
),
TimeGrain.DAY: "DATETIME({col}, 'start of day')",
TimeGrain.WEEK: "DATETIME({col}, 'start of day', \
-strftime('%w', {col}) || ' days')",
@ -62,6 +95,13 @@ class SqliteEngineSpec(BaseEngineSpec):
"DATETIME({col}, 'start of day', 'weekday 1', '-7 days')"
),
}
# not sure why these are diffenret
_time_grain_expressions.update(
{
TimeGrain.HALF_HOUR: _time_grain_expressions[TimeGrain.THIRTY_MINUTES],
TimeGrain.QUARTER_YEAR: _time_grain_expressions[TimeGrain.QUARTER],
}
)
custom_errors: dict[Pattern[str], tuple[str, SupersetErrorType, dict[str, Any]]] = {
COLUMN_DOES_NOT_EXIST_REGEX: (
@ -77,8 +117,8 @@ class SqliteEngineSpec(BaseEngineSpec):
@classmethod
def convert_dttm(
cls, target_type: str, dttm: datetime, db_extra: Optional[dict[str, Any]] = None
) -> Optional[str]:
cls, target_type: str, dttm: datetime, db_extra: dict[str, Any] | None = None
) -> str | None:
sqla_type = cls.get_sqla_column_type(target_type)
if isinstance(sqla_type, (types.String, types.DateTime)):
return f"""'{dttm.isoformat(sep=" ", timespec="seconds")}'"""
@ -86,7 +126,134 @@ class SqliteEngineSpec(BaseEngineSpec):
@classmethod
def get_table_names(
cls, database: "Database", inspector: Inspector, schema: Optional[str]
cls,
database: Database,
inspector: Inspector,
schema: str | None,
) -> set[str]:
"""Need to disregard the schema for Sqlite"""
return set(inspector.get_table_names())
@classmethod
def get_function_names(
cls,
database: Database,
) -> list[str]:
"""
Return function names.
"""
return [
"abs",
"acos",
"acosh",
"asin",
"asinh",
"atan",
"atan2",
"atanh",
"avg",
"ceil",
"ceiling",
"changes",
"char",
"coalesce",
"cos",
"cosh",
"count",
"cume_dist",
"date",
"datetime",
"degrees",
"dense_rank",
"exp",
"first_value",
"floor",
"format",
"glob",
"group_concat",
"hex",
"ifnull",
"iif",
"instr",
"json",
"json_array",
"json_array_length",
"json_each",
"json_error_position",
"json_extract",
"json_group_array",
"json_group_object",
"json_insert",
"json_object",
"json_patch",
"json_quote",
"json_remove",
"json_replace",
"json_set",
"json_tree",
"json_type",
"json_valid",
"julianday",
"lag",
"last_insert_rowid",
"last_value",
"lead",
"length",
"like",
"likelihood",
"likely",
"ln",
"load_extension",
"log",
"log10",
"log2",
"lower",
"ltrim",
"max",
"min",
"mod",
"nth_value",
"ntile",
"nullif",
"percent_rank",
"pi",
"pow",
"power",
"printf",
"quote",
"radians",
"random",
"randomblob",
"rank",
"replace",
"round",
"row_number",
"rtrim",
"sign",
"sin",
"sinh",
"soundex",
"sqlite_compileoption_get",
"sqlite_compileoption_used",
"sqlite_offset",
"sqlite_source_id",
"sqlite_version",
"sqrt",
"strftime",
"substr",
"substring",
"sum",
"tan",
"tanh",
"time",
"total_changes",
"trim",
"trunc",
"typeof",
"unhex",
"unicode",
"unixepoch",
"unlikely",
"upper",
"zeroblob",
]

View File

@ -2836,7 +2836,7 @@ class TestDatabaseApi(SupersetTestCase):
)
def test_function_names(self, mock_get_function_names):
example_db = get_example_database()
if example_db.backend in {"hive", "presto"}:
if example_db.backend in {"hive", "presto", "sqlite"}:
return
mock_get_function_names.return_value = ["AVG", "MAX", "SUM"]
@ -2850,6 +2850,136 @@ class TestDatabaseApi(SupersetTestCase):
assert rv.status_code == 200
assert response == {"function_names": ["AVG", "MAX", "SUM"]}
def test_function_names_sqlite(self):
example_db = get_example_database()
if example_db.backend != "sqlite":
return
self.login(username="admin")
uri = "api/v1/database/1/function_names/"
rv = self.client.get(uri)
response = json.loads(rv.data.decode("utf-8"))
assert rv.status_code == 200
assert response == {
"function_names": [
"abs",
"acos",
"acosh",
"asin",
"asinh",
"atan",
"atan2",
"atanh",
"avg",
"ceil",
"ceiling",
"changes",
"char",
"coalesce",
"cos",
"cosh",
"count",
"cume_dist",
"date",
"datetime",
"degrees",
"dense_rank",
"exp",
"first_value",
"floor",
"format",
"glob",
"group_concat",
"hex",
"ifnull",
"iif",
"instr",
"json",
"json_array",
"json_array_length",
"json_each",
"json_error_position",
"json_extract",
"json_group_array",
"json_group_object",
"json_insert",
"json_object",
"json_patch",
"json_quote",
"json_remove",
"json_replace",
"json_set",
"json_tree",
"json_type",
"json_valid",
"julianday",
"lag",
"last_insert_rowid",
"last_value",
"lead",
"length",
"like",
"likelihood",
"likely",
"ln",
"load_extension",
"log",
"log10",
"log2",
"lower",
"ltrim",
"max",
"min",
"mod",
"nth_value",
"ntile",
"nullif",
"percent_rank",
"pi",
"pow",
"power",
"printf",
"quote",
"radians",
"random",
"randomblob",
"rank",
"replace",
"round",
"row_number",
"rtrim",
"sign",
"sin",
"sinh",
"soundex",
"sqlite_compileoption_get",
"sqlite_compileoption_used",
"sqlite_offset",
"sqlite_source_id",
"sqlite_version",
"sqrt",
"strftime",
"substr",
"substring",
"sum",
"tan",
"tanh",
"time",
"total_changes",
"trim",
"trunc",
"typeof",
"unhex",
"unicode",
"unixepoch",
"unlikely",
"upper",
"zeroblob",
]
}
@mock.patch("superset.databases.api.get_available_engine_specs")
@mock.patch("superset.databases.api.app")
def test_available(self, app, get_available_engine_specs):

View File

@ -21,6 +21,7 @@ from typing import Optional
import pytest
from sqlalchemy.engine import create_engine
from superset.constants import TimeGrain
from tests.unit_tests.db_engine_specs.utils import assert_convert_dttm
from tests.unit_tests.fixtures.common import dttm
@ -47,13 +48,21 @@ def test_convert_dttm(
@pytest.mark.parametrize(
"dttm,grain,expected",
[
("2022-05-04T05:06:07.89Z", "PT1S", "2022-05-04 05:06:07"),
("2022-05-04T05:06:07.89Z", "PT1M", "2022-05-04 05:06:00"),
("2022-05-04T05:06:07.89Z", "PT1H", "2022-05-04 05:00:00"),
("2022-05-04T05:06:07.89Z", "P1D", "2022-05-04 00:00:00"),
("2022-05-04T05:06:07.89Z", "P1W", "2022-05-01 00:00:00"),
("2022-05-04T05:06:07.89Z", "P1M", "2022-05-01 00:00:00"),
("2022-05-04T05:06:07.89Z", "P1Y", "2022-01-01 00:00:00"),
("2022-05-04T05:06:07.89Z", TimeGrain.SECOND, "2022-05-04 05:06:07"),
("2022-05-04T05:06:07.89Z", TimeGrain.FIVE_SECONDS, "2022-05-04 05:06:05"),
("2022-05-04T05:06:37.89Z", TimeGrain.THIRTY_SECONDS, "2022-05-04 05:06:30"),
("2022-05-04T05:06:07.89Z", TimeGrain.MINUTE, "2022-05-04 05:06:00"),
("2022-05-04T05:06:07.89Z", TimeGrain.FIVE_MINUTES, "2022-05-04 05:05:00"),
("2022-05-04T05:36:07.89Z", TimeGrain.TEN_MINUTES, "2022-05-04 05:30:00"),
("2022-05-04T05:46:07.89Z", TimeGrain.FIFTEEN_MINUTES, "2022-05-04 05:45:00"),
("2022-05-04T05:36:07.89Z", TimeGrain.THIRTY_MINUTES, "2022-05-04 05:30:00"),
("2022-05-04T05:36:07.89Z", TimeGrain.HALF_HOUR, "2022-05-04 05:30:00"),
("2022-05-04T05:06:07.89Z", TimeGrain.HOUR, "2022-05-04 05:00:00"),
("2022-05-04T07:06:07.89Z", TimeGrain.SIX_HOURS, "2022-05-04 06:00:00"),
("2022-05-04T05:06:07.89Z", TimeGrain.DAY, "2022-05-04 00:00:00"),
("2022-05-04T05:06:07.89Z", TimeGrain.WEEK, "2022-05-01 00:00:00"),
("2022-05-04T05:06:07.89Z", TimeGrain.MONTH, "2022-05-01 00:00:00"),
("2022-05-04T05:06:07.89Z", TimeGrain.YEAR, "2022-01-01 00:00:00"),
# ___________________________
# | May 2022 |
# |---------------------------|
@ -61,27 +70,50 @@ def test_convert_dttm(
# |---+---+---+---+---+---+---|
# | 1 | 2 | 3 | 4 | 5 | 6 | 7 |
# ---------------------------
# week ending Saturday
("2022-05-04T05:06:07.89Z", "P1W/1970-01-03T00:00:00Z", "2022-05-07 00:00:00"),
# week ending Sunday
("2022-05-04T05:06:07.89Z", "P1W/1970-01-04T00:00:00Z", "2022-05-08 00:00:00"),
# week starting Sunday
("2022-05-04T05:06:07.89Z", "1969-12-28T00:00:00Z/P1W", "2022-05-01 00:00:00"),
# week starting Monday
("2022-05-04T05:06:07.89Z", "1969-12-29T00:00:00Z/P1W", "2022-05-02 00:00:00"),
# tests for quarter
("2022-01-04T05:06:07.89Z", "P3M", "2022-01-01 00:00:00"),
("2022-02-04T05:06:07.89Z", "P3M", "2022-01-01 00:00:00"),
("2022-03-04T05:06:07.89Z", "P3M", "2022-01-01 00:00:00"),
("2022-04-04T05:06:07.89Z", "P3M", "2022-04-01 00:00:00"),
("2022-05-04T05:06:07.89Z", "P3M", "2022-04-01 00:00:00"),
("2022-06-04T05:06:07.89Z", "P3M", "2022-04-01 00:00:00"),
("2022-07-04T05:06:07.89Z", "P3M", "2022-07-01 00:00:00"),
("2022-08-04T05:06:07.89Z", "P3M", "2022-07-01 00:00:00"),
("2022-09-04T05:06:07.89Z", "P3M", "2022-07-01 00:00:00"),
("2022-10-04T05:06:07.89Z", "P3M", "2022-10-01 00:00:00"),
("2022-11-04T05:06:07.89Z", "P3M", "2022-10-01 00:00:00"),
("2022-12-04T05:06:07.89Z", "P3M", "2022-10-01 00:00:00"),
(
"2022-05-04T05:06:07.89Z",
TimeGrain.WEEK_ENDING_SATURDAY,
"2022-05-07 00:00:00",
),
(
"2022-05-04T05:06:07.89Z",
TimeGrain.WEEK_ENDING_SUNDAY,
"2022-05-08 00:00:00",
),
(
"2022-05-04T05:06:07.89Z",
TimeGrain.WEEK_STARTING_SUNDAY,
"2022-05-01 00:00:00",
),
(
"2022-05-04T05:06:07.89Z",
TimeGrain.WEEK_STARTING_MONDAY,
"2022-05-02 00:00:00",
),
("2022-01-04T05:06:07.89Z", TimeGrain.QUARTER_YEAR, "2022-01-01 00:00:00"),
("2022-02-04T05:06:07.89Z", TimeGrain.QUARTER_YEAR, "2022-01-01 00:00:00"),
("2022-03-04T05:06:07.89Z", TimeGrain.QUARTER_YEAR, "2022-01-01 00:00:00"),
("2022-04-04T05:06:07.89Z", TimeGrain.QUARTER_YEAR, "2022-04-01 00:00:00"),
("2022-05-04T05:06:07.89Z", TimeGrain.QUARTER_YEAR, "2022-04-01 00:00:00"),
("2022-06-04T05:06:07.89Z", TimeGrain.QUARTER_YEAR, "2022-04-01 00:00:00"),
("2022-07-04T05:06:07.89Z", TimeGrain.QUARTER_YEAR, "2022-07-01 00:00:00"),
("2022-08-04T05:06:07.89Z", TimeGrain.QUARTER_YEAR, "2022-07-01 00:00:00"),
("2022-09-04T05:06:07.89Z", TimeGrain.QUARTER_YEAR, "2022-07-01 00:00:00"),
("2022-10-04T05:06:07.89Z", TimeGrain.QUARTER_YEAR, "2022-10-01 00:00:00"),
("2022-11-04T05:06:07.89Z", TimeGrain.QUARTER_YEAR, "2022-10-01 00:00:00"),
("2022-12-04T05:06:07.89Z", TimeGrain.QUARTER_YEAR, "2022-10-01 00:00:00"),
("2022-01-04T05:06:07.89Z", TimeGrain.QUARTER, "2022-01-01 00:00:00"),
("2022-02-04T05:06:07.89Z", TimeGrain.QUARTER, "2022-01-01 00:00:00"),
("2022-03-04T05:06:07.89Z", TimeGrain.QUARTER, "2022-01-01 00:00:00"),
("2022-04-04T05:06:07.89Z", TimeGrain.QUARTER, "2022-04-01 00:00:00"),
("2022-05-04T05:06:07.89Z", TimeGrain.QUARTER, "2022-04-01 00:00:00"),
("2022-06-04T05:06:07.89Z", TimeGrain.QUARTER, "2022-04-01 00:00:00"),
("2022-07-04T05:06:07.89Z", TimeGrain.QUARTER, "2022-07-01 00:00:00"),
("2022-08-04T05:06:07.89Z", TimeGrain.QUARTER, "2022-07-01 00:00:00"),
("2022-09-04T05:06:07.89Z", TimeGrain.QUARTER, "2022-07-01 00:00:00"),
("2022-10-04T05:06:07.89Z", TimeGrain.QUARTER, "2022-10-01 00:00:00"),
("2022-11-04T05:06:07.89Z", TimeGrain.QUARTER, "2022-10-01 00:00:00"),
("2022-12-04T05:06:07.89Z", TimeGrain.QUARTER, "2022-10-01 00:00:00"),
],
)
def test_time_grain_expressions(dttm: str, grain: str, expected: str) -> None: